Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 379 results for author: <span class="mathjax">Lin, G</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lin%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lin, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lin%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lin, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lin%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17972">arXiv:2502.17972</a> <span> [<a href="https://arxiv.org/pdf/2502.17972">pdf</a>, <a href="https://arxiv.org/format/2502.17972">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Model-Free Adversarial Purification via Coarse-To-Fine Tensor Network Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+D+T">Duc Thien Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Z">Zerui Tao</a>, <a href="/search/cs?searchtype=author&query=Slavakis%2C+K">Konstantinos Slavakis</a>, <a href="/search/cs?searchtype=author&query=Tanaka%2C+T">Toshihisa Tanaka</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qibin Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17972v1-abstract-short" style="display: inline;"> Deep neural networks are known to be vulnerable to well-designed adversarial attacks. Although numerous defense strategies have been proposed, many are tailored to the specific attacks or tasks and often fail to generalize across diverse scenarios. In this paper, we propose Tensor Network Purification (TNP), a novel model-free adversarial purification method by a specially designed tensor network… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17972v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17972v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17972v1-abstract-full" style="display: none;"> Deep neural networks are known to be vulnerable to well-designed adversarial attacks. Although numerous defense strategies have been proposed, many are tailored to the specific attacks or tasks and often fail to generalize across diverse scenarios. In this paper, we propose Tensor Network Purification (TNP), a novel model-free adversarial purification method by a specially designed tensor network decomposition algorithm. TNP depends neither on the pre-trained generative model nor the specific dataset, resulting in strong robustness across diverse adversarial scenarios. To this end, the key challenge lies in relaxing Gaussian-noise assumptions of classical decompositions and accommodating the unknown distribution of adversarial perturbations. Unlike the low-rank representation of classical decompositions, TNP aims to reconstruct the unobserved clean examples from an adversarial example. Specifically, TNP leverages progressive downsampling and introduces a novel adversarial optimization objective to address the challenge of minimizing reconstruction error but without inadvertently restoring adversarial perturbations. Extensive experiments conducted on CIFAR-10, CIFAR-100, and ImageNet demonstrate that our method generalizes effectively across various norm threats, attack types, and tasks, providing a versatile and promising adversarial purification technique. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17972v1-abstract-full').style.display = 'none'; document.getElementById('2502.17972v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16474">arXiv:2502.16474</a> <span> [<a href="https://arxiv.org/pdf/2502.16474">pdf</a>, <a href="https://arxiv.org/format/2502.16474">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Unified Semantic and ID Representation Learning for Deep Recommenders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guanyu Lin</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Z">Zhigang Hua</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuang Yang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+B">Bo Long</a>, <a href="/search/cs?searchtype=author&query=You%2C+J">Jiaxuan You</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16474v1-abstract-short" style="display: inline;"> Effective recommendation is crucial for large-scale online platforms. Traditional recommendation systems primarily rely on ID tokens to uniquely identify items, which can effectively capture specific item relationships but suffer from issues such as redundancy and poor performance in cold-start scenarios. Recent approaches have explored using semantic tokens as an alternative, yet they face challe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16474v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16474v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16474v1-abstract-full" style="display: none;"> Effective recommendation is crucial for large-scale online platforms. Traditional recommendation systems primarily rely on ID tokens to uniquely identify items, which can effectively capture specific item relationships but suffer from issues such as redundancy and poor performance in cold-start scenarios. Recent approaches have explored using semantic tokens as an alternative, yet they face challenges, including item duplication and inconsistent performance gains, leaving the potential advantages of semantic tokens inadequately examined. To address these limitations, we propose a Unified Semantic and ID Representation Learning framework that leverages the complementary strengths of both token types. In our framework, ID tokens capture unique item attributes, while semantic tokens represent shared, transferable characteristics. Additionally, we analyze the role of cosine similarity and Euclidean distance in embedding search, revealing that cosine similarity is more effective in decoupling accumulated embeddings, while Euclidean distance excels in distinguishing unique items. Our framework integrates cosine similarity in earlier layers and Euclidean distance in the final layer to optimize representation learning. Experiments on three benchmark datasets show that our method significantly outperforms state-of-the-art baselines, with improvements ranging from 6\% to 17\% and a reduction in token size by over 80%. These results demonstrate the effectiveness of combining ID and semantic tokenization to enhance the generalization ability of recommender systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16474v1-abstract-full').style.display = 'none'; document.getElementById('2502.16474v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12135">arXiv:2502.12135</a> <span> [<a href="https://arxiv.org/pdf/2502.12135">pdf</a>, <a href="https://arxiv.org/format/2502.12135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> MagicArticulate: Make Your 3D Models Articulation-Ready </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+C">Chaoyue Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiu Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiwen Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhongcong Xu</a>, <a href="/search/cs?searchtype=author&query=Liew%2C+J+H">Jun Hao Liew</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xiaoyang Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fayao Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jiashi Feng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12135v2-abstract-short" style="display: inline;"> With the explosive growth of 3D content creation, there is an increasing demand for automatically converting static 3D models into articulation-ready versions that support realistic animation. Traditional approaches rely heavily on manual annotation, which is both time-consuming and labor-intensive. Moreover, the lack of large-scale benchmarks has hindered the development of learning-based solutio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12135v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12135v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12135v2-abstract-full" style="display: none;"> With the explosive growth of 3D content creation, there is an increasing demand for automatically converting static 3D models into articulation-ready versions that support realistic animation. Traditional approaches rely heavily on manual annotation, which is both time-consuming and labor-intensive. Moreover, the lack of large-scale benchmarks has hindered the development of learning-based solutions. In this work, we present MagicArticulate, an effective framework that automatically transforms static 3D models into articulation-ready assets. Our key contributions are threefold. First, we introduce Articulation-XL, a large-scale benchmark containing over 33k 3D models with high-quality articulation annotations, carefully curated from Objaverse-XL. Second, we propose a novel skeleton generation method that formulates the task as a sequence modeling problem, leveraging an auto-regressive transformer to naturally handle varying numbers of bones or joints within skeletons and their inherent dependencies across different 3D models. Third, we predict skinning weights using a functional diffusion process that incorporates volumetric geodesic distance priors between vertices and joints. Extensive experiments demonstrate that MagicArticulate significantly outperforms existing methods across diverse object categories, achieving high-quality articulation that enables realistic animation. Project page: https://chaoyuesong.github.io/MagicArticulate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12135v2-abstract-full').style.display = 'none'; document.getElementById('2502.12135v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project: https://chaoyuesong.github.io/MagicArticulate</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11515">arXiv:2502.11515</a> <span> [<a href="https://arxiv.org/pdf/2502.11515">pdf</a>, <a href="https://arxiv.org/format/2502.11515">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SayAnything: Audio-Driven Lip Synchronization with Conditional Video Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junxian Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiwen Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Junyi Hu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jian Liang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=chen%2C+J">Jingbo chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kai Li</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Y">Yu Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11515v1-abstract-short" style="display: inline;"> Recent advances in diffusion models have led to significant progress in audio-driven lip synchronization. However, existing methods typically rely on constrained audio-visual alignment priors or multi-stage learning of intermediate representations to force lip motion synthesis. This leads to complex training pipelines and limited motion naturalness. In this paper, we present SayAnything, a conditi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11515v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11515v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11515v1-abstract-full" style="display: none;"> Recent advances in diffusion models have led to significant progress in audio-driven lip synchronization. However, existing methods typically rely on constrained audio-visual alignment priors or multi-stage learning of intermediate representations to force lip motion synthesis. This leads to complex training pipelines and limited motion naturalness. In this paper, we present SayAnything, a conditional video diffusion framework that directly synthesizes lip movements from audio input while preserving speaker identity. Specifically, we propose three specialized modules including identity preservation module, audio guidance module, and editing control module. Our novel design effectively balances different condition signals in the latent space, enabling precise control over appearance, motion, and region-specific generation without requiring additional supervision signals or intermediate representations. Extensive experiments demonstrate that SayAnything generates highly realistic videos with improved lip-teeth coherence, enabling unseen characters to say anything, while effectively generalizing to animated characters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11515v1-abstract-full').style.display = 'none'; document.getElementById('2502.11515v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11029">arXiv:2502.11029</a> <span> [<a href="https://arxiv.org/pdf/2502.11029">pdf</a>, <a href="https://arxiv.org/format/2502.11029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> HawkEye: Statically and Accurately Profiling the Communication Cost of Models in Multi-party Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ruan%2C+W">Wenqiang Ruan</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xin Lin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+R">Ruisheng Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guopeng Lin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shui Yu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+W">Weili Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11029v1-abstract-short" style="display: inline;"> Multi-party computation (MPC) based machine learning, referred to as multi-party learning (MPL), has become an important technology for utilizing data from multiple parties with privacy preservation. In recent years, in order to apply MPL in more practical scenarios, various MPC-friendly models have been proposedto reduce the extraordinary communication overhead of MPL. Within the optimization of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11029v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11029v1-abstract-full" style="display: none;"> Multi-party computation (MPC) based machine learning, referred to as multi-party learning (MPL), has become an important technology for utilizing data from multiple parties with privacy preservation. In recent years, in order to apply MPL in more practical scenarios, various MPC-friendly models have been proposedto reduce the extraordinary communication overhead of MPL. Within the optimization of MPC-friendly models, a critical element to tackle the challenge is profiling the communication cost of models. However, the current solutions mainly depend on manually establishing the profiles to identify communication bottlenecks of models, often involving burdensome human efforts in a monotonous procedure. In this paper, we propose HawkEye, a static model communication cost profiling framework, which enables model designers to get the accurate communication cost of models in MPL frameworks without dynamically running the secure model training or inference processes on a specific MPL framework. Firstly, to profile the communication cost of models with complex structures, we propose a static communication cost profiling method based on a prefix structure that records the function calling chain during the static analysis. Secondly, HawkEye employs an automatic differentiation library to assist model designers in profiling the communication cost of models in PyTorch. Finally, we compare the static profiling results of HawkEye against the profiling results obtained through dynamically running secure model training and inference processes on five popular MPL frameworks, CryptFlow2, CrypTen, Delphi, Cheetah, and SecretFlow-SEMI2K. The experimental results show that HawkEye can accurately profile the model communication cost without dynamic profiling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11029v1-abstract-full').style.display = 'none'; document.getElementById('2502.11029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for publication at USENIX Security 2025. Please cite this paper as 'Wenqiang Ruan, Xin Lin, Ruisheng Zhou, Guopeng Lin, Shui Yu, Weili Han, HawkEye: Statically and Accurately Profiling the Communication Cost of Models in Multi-party Learning. In Proceedings of the 34th USENIX Security, August 13-15, 2025, Seattle, WA, USA.'</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09799">arXiv:2502.09799</a> <span> [<a href="https://arxiv.org/pdf/2502.09799">pdf</a>, <a href="https://arxiv.org/format/2502.09799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713971">10.1145/3706598.3713971 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Co-designing Large Language Model Tools for Project-Based Learning with K12 Educators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravi%2C+P">Prerna Ravi</a>, <a href="/search/cs?searchtype=author&query=Masla%2C+J">John Masla</a>, <a href="/search/cs?searchtype=author&query=Kakoti%2C+G">Gisella Kakoti</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Grace Lin</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+E">Emma Anderson</a>, <a href="/search/cs?searchtype=author&query=Taylor%2C+M">Matt Taylor</a>, <a href="/search/cs?searchtype=author&query=Ostrowski%2C+A">Anastasia Ostrowski</a>, <a href="/search/cs?searchtype=author&query=Breazeal%2C+C">Cynthia Breazeal</a>, <a href="/search/cs?searchtype=author&query=Klopfer%2C+E">Eric Klopfer</a>, <a href="/search/cs?searchtype=author&query=Abelson%2C+H">Hal Abelson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09799v1-abstract-short" style="display: inline;"> The emergence of generative AI, particularly large language models (LLMs), has opened the door for student-centered and active learning methods like project-based learning (PBL). However, PBL poses practical implementation challenges for educators around project design and management, assessment, and balancing student guidance with student autonomy. The following research documents a co-design pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09799v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09799v1-abstract-full" style="display: none;"> The emergence of generative AI, particularly large language models (LLMs), has opened the door for student-centered and active learning methods like project-based learning (PBL). However, PBL poses practical implementation challenges for educators around project design and management, assessment, and balancing student guidance with student autonomy. The following research documents a co-design process with interdisciplinary K-12 teachers to explore and address the current PBL challenges they face. Through teacher-driven interviews, collaborative workshops, and iterative design of wireframes, we gathered evidence for ways LLMs can support teachers in implementing high-quality PBL pedagogy by automating routine tasks and enhancing personalized learning. Teachers in the study advocated for supporting their professional growth and augmenting their current roles without replacing them. They also identified affordances and challenges around classroom integration, including resource requirements and constraints, ethical concerns, and potential immediate and long-term impacts. Drawing on these, we propose design guidelines for future deployment of LLM tools in PBL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09799v1-abstract-full').style.display = 'none'; document.getElementById('2502.09799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> CHI Conference on Human Factors in Computing Systems (CHI '25), April 26-May 01, 2025, Yokohama, Japan. ACM, New York, NY, USA </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09039">arXiv:2502.09039</a> <span> [<a href="https://arxiv.org/pdf/2502.09039">pdf</a>, <a href="https://arxiv.org/format/2502.09039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Images are Gaussians: High-Quality Large Image Representation with Levels of 2D Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lingting Zhu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guying Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jinnan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhenchao Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhao Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lequan Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09039v1-abstract-short" style="display: inline;"> While Implicit Neural Representations (INRs) have demonstrated significant success in image representation, they are often hindered by large training memory and slow decoding speed. Recently, Gaussian Splatting (GS) has emerged as a promising solution in 3D reconstruction due to its high-quality novel view synthesis and rapid rendering capabilities, positioning it as a valuable tool for a broad sp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09039v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09039v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09039v1-abstract-full" style="display: none;"> While Implicit Neural Representations (INRs) have demonstrated significant success in image representation, they are often hindered by large training memory and slow decoding speed. Recently, Gaussian Splatting (GS) has emerged as a promising solution in 3D reconstruction due to its high-quality novel view synthesis and rapid rendering capabilities, positioning it as a valuable tool for a broad spectrum of applications. In particular, a GS-based representation, 2DGS, has shown potential for image fitting. In our work, we present \textbf{L}arge \textbf{I}mages are \textbf{G}aussians (\textbf{LIG}), which delves deeper into the application of 2DGS for image representations, addressing the challenge of fitting large images with 2DGS in the situation of numerous Gaussian points, through two distinct modifications: 1) we adopt a variant of representation and optimization strategy, facilitating the fitting of a large number of Gaussian points; 2) we propose a Level-of-Gaussian approach for reconstructing both coarse low-frequency initialization and fine high-frequency details. Consequently, we successfully represent large images as Gaussian points and achieve high-quality large image representation, demonstrating its efficacy across various types of large images. Code is available at {\href{https://github.com/HKU-MedAI/LIG}{https://github.com/HKU-MedAI/LIG}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09039v1-abstract-full').style.display = 'none'; document.getElementById('2502.09039v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by 39th Annual AAAI Conference on Artificial Intelligence (AAAI 2025). 10 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07259">arXiv:2502.07259</a> <span> [<a href="https://arxiv.org/pdf/2502.07259">pdf</a>, <a href="https://arxiv.org/format/2502.07259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3847/1538-4357/adadff">10.3847/1538-4357/adadff <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Flat U-Net: An Efficient Ultralightweight Model for Solar Filament Segmentation in Full-disk H$伪$ Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+G">GaoFei Zhu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">GangHua Lin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiao Yang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+C">Cheng Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07259v1-abstract-short" style="display: inline;"> Solar filaments are one of the most prominent features observed on the Sun, and their evolutions are closely related to various solar activities, such as flares and coronal mass ejections. Real-time automated identification of solar filaments is the most effective approach to managing large volumes of data. Existing models of filament identification are characterized by large parameter sizes and h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07259v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07259v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07259v1-abstract-full" style="display: none;"> Solar filaments are one of the most prominent features observed on the Sun, and their evolutions are closely related to various solar activities, such as flares and coronal mass ejections. Real-time automated identification of solar filaments is the most effective approach to managing large volumes of data. Existing models of filament identification are characterized by large parameter sizes and high computational costs, which limit their future applications in highly integrated and intelligent ground-based and space-borne observation devices. Consequently, the design of more lightweight models will facilitate the advancement of intelligent observation equipment. In this study, we introduce Flat U-Net, a novel and highly efficient ultralightweight model that incorporates simplified channel attention (SCA) and channel self-attention (CSA) convolutional blocks for the segmentation of solar filaments in full-disk H$伪$ images. Feature information from each network layer is fully extracted to reconstruct interchannel feature representations. Each block effectively optimizes the channel features from the previous layer, significantly reducing parameters. The network architecture presents an elegant flattening, improving its efficiency, and simplifying the overall design. Experimental validation demonstrates that a model composed of pure SCAs achieves a precision of approximately 0.93, with dice similarity coefficient (DSC) and recall rates of 0.76 and 0.64, respectively, significantly outperforming the classical U-Net. Introducing a certain number of CSA blocks improves the DSC and recall rates to 0.82 and 0.74, respectively, which demonstrates a pronounced advantage, particularly concerning model weight size and detection effectiveness. The data set, models, and code are available as open-source resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07259v1-abstract-full').style.display = 'none'; document.getElementById('2502.07259v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures, 3 tables, accepted for publication in ApJ</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ApJ 980, 176 (2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04728">arXiv:2502.04728</a> <span> [<a href="https://arxiv.org/pdf/2502.04728">pdf</a>, <a href="https://arxiv.org/format/2502.04728">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generating Symbolic World Models via Test-time Scaling of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhouliang Yu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Yuhuan Yuan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+T+Z">Tim Z. Xiao</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+F+F">Fuxiang Frank Xia</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+J">Jie Fu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Ge Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Weiyang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04728v1-abstract-short" style="display: inline;"> Solving complex planning problems requires Large Language Models (LLMs) to explicitly model the state transition to avoid rule violations, comply with constraints, and ensure optimality-a task hindered by the inherent ambiguity of natural language. To overcome such ambiguity, Planning Domain Definition Language (PDDL) is leveraged as a planning abstraction that enables precise and formal state des… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04728v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04728v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04728v1-abstract-full" style="display: none;"> Solving complex planning problems requires Large Language Models (LLMs) to explicitly model the state transition to avoid rule violations, comply with constraints, and ensure optimality-a task hindered by the inherent ambiguity of natural language. To overcome such ambiguity, Planning Domain Definition Language (PDDL) is leveraged as a planning abstraction that enables precise and formal state descriptions. With PDDL, we can generate a symbolic world model where classic searching algorithms, such as A*, can be seamlessly applied to find optimal plans. However, directly generating PDDL domains with current LLMs remains an open challenge due to the lack of PDDL training data. To address this challenge, we propose to scale up the test-time computation of LLMs to enhance their PDDL reasoning capabilities, thereby enabling the generation of high-quality PDDL domains. Specifically, we introduce a simple yet effective algorithm, which first employs a Best-of-N sampling approach to improve the quality of the initial solution and then refines the solution in a fine-grained manner with verbalized machine learning. Our method outperforms o1-mini by a considerable margin in the generation of PDDL domain, achieving over 50% success rate on two tasks (i.e., generating PDDL domains from natural language description or PDDL problems). This is done without requiring additional training. By taking advantage of PDDL as state abstraction, our method is able to outperform current state-of-the-art methods on almost all competition-level planning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04728v1-abstract-full').style.display = 'none'; document.getElementById('2502.04728v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical Report v1 (32 pages, 6 figures)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03207">arXiv:2502.03207</a> <span> [<a href="https://arxiv.org/pdf/2502.03207">pdf</a>, <a href="https://arxiv.org/format/2502.03207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> MotionAgent: Fine-grained Controllable Video Generation via Motion Field Agent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+X">Xinyao Liao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xianfang Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liao Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Gang Yu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03207v1-abstract-short" style="display: inline;"> We propose MotionAgent, enabling fine-grained motion control for text-guided image-to-video generation. The key technique is the motion field agent that converts motion information in text prompts into explicit motion fields, providing flexible and precise motion guidance. Specifically, the agent extracts the object movement and camera motion described in the text and converts them into object tra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03207v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03207v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03207v1-abstract-full" style="display: none;"> We propose MotionAgent, enabling fine-grained motion control for text-guided image-to-video generation. The key technique is the motion field agent that converts motion information in text prompts into explicit motion fields, providing flexible and precise motion guidance. Specifically, the agent extracts the object movement and camera motion described in the text and converts them into object trajectories and camera extrinsics, respectively. An analytical optical flow composition module integrates these motion representations in 3D space and projects them into a unified optical flow. An optical flow adapter takes the flow to control the base image-to-video diffusion model for generating fine-grained controlled videos. The significant improvement in the Video-Text Camera Motion metrics on VBench indicates that our method achieves precise control over camera motion. We construct a subset of VBench to evaluate the alignment of motion information in the text and the generated video, outperforming other advanced models on motion generation accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03207v1-abstract-full').style.display = 'none'; document.getElementById('2502.03207v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01061">arXiv:2502.01061</a> <span> [<a href="https://arxiv.org/pdf/2502.01061">pdf</a>, <a href="https://arxiv.org/format/2502.01061">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OmniHuman-1: Rethinking the Scaling-Up of One-Stage Conditioned Human Animation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+G">Gaojie Lin</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jianwen Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiaqi Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zerong Zheng</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chao Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01061v2-abstract-short" style="display: inline;"> End-to-end human animation, such as audio-driven talking human generation, has undergone notable advancements in the recent few years. However, existing methods still struggle to scale up as large general video generation models, limiting their potential in real applications. In this paper, we propose OmniHuman, a Diffusion Transformer-based framework that scales up data by mixing motion-related c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01061v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01061v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01061v2-abstract-full" style="display: none;"> End-to-end human animation, such as audio-driven talking human generation, has undergone notable advancements in the recent few years. However, existing methods still struggle to scale up as large general video generation models, limiting their potential in real applications. In this paper, we propose OmniHuman, a Diffusion Transformer-based framework that scales up data by mixing motion-related conditions into the training phase. To this end, we introduce two training principles for these mixed conditions, along with the corresponding model architecture and inference strategy. These designs enable OmniHuman to fully leverage data-driven motion generation, ultimately achieving highly realistic human video generation. More importantly, OmniHuman supports various portrait contents (face close-up, portrait, half-body, full-body), supports both talking and singing, handles human-object interactions and challenging body poses, and accommodates different image styles. Compared to existing end-to-end audio-driven methods, OmniHuman not only produces more realistic videos, but also offers greater flexibility in inputs. It also supports multiple driving modalities (audio-driven, video-driven and combined driving signals). Video samples are provided on the ttfamily project page (https://omnihuman-lab.github.io) <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01061v2-abstract-full').style.display = 'none'; document.getElementById('2502.01061v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://omnihuman-lab.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00657">arXiv:2502.00657</a> <span> [<a href="https://arxiv.org/pdf/2502.00657">pdf</a>, <a href="https://arxiv.org/format/2502.00657">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> LLM Safety Alignment is Divergence Estimation in Disguise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haldar%2C+R">Rajdeep Haldar</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziyi Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Q">Qifan Song</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yue Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00657v1-abstract-short" style="display: inline;"> We propose a theoretical framework demonstrating that popular Large Language Model (LLM) alignment methods, including Reinforcement Learning from Human Feedback (RLHF) and alternatives, fundamentally function as divergence estimators between aligned (preferred or safe) and unaligned (less-preferred or harmful) distributions. This explains the separation phenomenon between safe and harmful prompts… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00657v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00657v1-abstract-full" style="display: none;"> We propose a theoretical framework demonstrating that popular Large Language Model (LLM) alignment methods, including Reinforcement Learning from Human Feedback (RLHF) and alternatives, fundamentally function as divergence estimators between aligned (preferred or safe) and unaligned (less-preferred or harmful) distributions. This explains the separation phenomenon between safe and harmful prompts in the model hidden representation after alignment. Inspired by the theoretical results, we identify that some alignment methods are better than others in terms of separation and, introduce a new method, KLDO, and further demonstrate the implication of our theories. We advocate for compliance-refusal datasets over preference datasets to enhance safety alignment, supported by both theoretical reasoning and empirical evidence. Additionally, to quantify safety separation, we leverage a distance metric in the representation space and statistically validate its efficacy as a statistical significant indicator of LLM resilience against jailbreak attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00657v1-abstract-full').style.display = 'none'; document.getElementById('2502.00657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00550">arXiv:2502.00550</a> <span> [<a href="https://arxiv.org/pdf/2502.00550">pdf</a>, <a href="https://arxiv.org/format/2502.00550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Muti-Fidelity Prediction and Uncertainty Quantification with Laplace Neural Operators for Parametric Partial Differential Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Haoyang Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00550v1-abstract-short" style="display: inline;"> Laplace Neural Operators (LNOs) have recently emerged as a promising approach in scientific machine learning due to the ability to learn nonlinear maps between functional spaces. However, this framework often requires substantial amounts of high-fidelity (HF) training data, which is often prohibitively expensive to acquire. To address this, we propose multi-fidelity Laplace Neural Operators (MF-LN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00550v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00550v1-abstract-full" style="display: none;"> Laplace Neural Operators (LNOs) have recently emerged as a promising approach in scientific machine learning due to the ability to learn nonlinear maps between functional spaces. However, this framework often requires substantial amounts of high-fidelity (HF) training data, which is often prohibitively expensive to acquire. To address this, we propose multi-fidelity Laplace Neural Operators (MF-LNOs), which combine a low-fidelity (LF) base model with parallel linear/nonlinear HF correctors and dynamic inter-fidelity weighting. This allows us to exploit correlations between LF and HF datasets and achieve accurate inference of quantities of interest even with sparse HF data. We further incorporate a modified replica exchange stochastic gradient Langevin algorithm, which enables a more effective posterior distribution estimation and uncertainty quantification in model predictions. Extensive validation across four canonical dynamical systems (the Lorenz system, Duffing oscillator, Burgers equation, and Brusselator reaction-diffusion system) demonstrates the framework's effectiveness. The results show significant improvements, with testing losses reduced by 40% to 80% compared to traditional approaches. This validates MF-LNO as a versatile tool for surrogate modeling in parametric PDEs, offering significant improvements in data efficiency and uncertainty-aware prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00550v1-abstract-full').style.display = 'none'; document.getElementById('2502.00550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17323">arXiv:2501.17323</a> <span> [<a href="https://arxiv.org/pdf/2501.17323">pdf</a>, <a href="https://arxiv.org/format/2501.17323">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Exploring Non-Convex Discrete Energy Landscapes: A Langevin-Like Sampler with Replica Exchange </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Haoyang Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17323v1-abstract-short" style="display: inline;"> Gradient-based Discrete Samplers (GDSs) are effective for sampling discrete energy landscapes. However, they often stagnate in complex, non-convex settings. To improve exploration, we introduce the Discrete Replica EXchangE Langevin (DREXEL) sampler and its variant with Adjusted Metropolis (DREAM). These samplers use two GDSs at different temperatures and step sizes: one focuses on local exploitat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17323v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17323v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17323v1-abstract-full" style="display: none;"> Gradient-based Discrete Samplers (GDSs) are effective for sampling discrete energy landscapes. However, they often stagnate in complex, non-convex settings. To improve exploration, we introduce the Discrete Replica EXchangE Langevin (DREXEL) sampler and its variant with Adjusted Metropolis (DREAM). These samplers use two GDSs at different temperatures and step sizes: one focuses on local exploitation, while the other explores broader energy landscapes. When energy differences are significant, sample swaps occur, which are determined by a mechanism tailored for discrete sampling to ensure detailed balance. Theoretically, we prove both DREXEL and DREAM converge asymptotically to the target energy and exhibit faster mixing than a single GDS. Experiments further confirm their efficiency in exploring non-convex discrete energy landscapes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17323v1-abstract-full').style.display = 'none'; document.getElementById('2501.17323v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 figures, 23 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15616">arXiv:2501.15616</a> <span> [<a href="https://arxiv.org/pdf/2501.15616">pdf</a>, <a href="https://arxiv.org/format/2501.15616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> IPVTON: Image-based 3D Virtual Try-on with Image Prompt Adapter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+X">Xiaojing Zhong</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhonghua Wu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofeng Yang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qingyao Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15616v1-abstract-short" style="display: inline;"> Given a pair of images depicting a person and a garment separately, image-based 3D virtual try-on methods aim to reconstruct a 3D human model that realistically portrays the person wearing the desired garment. In this paper, we present IPVTON, a novel image-based 3D virtual try-on framework. IPVTON employs score distillation sampling with image prompts to optimize a hybrid 3D human representation,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15616v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15616v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15616v1-abstract-full" style="display: none;"> Given a pair of images depicting a person and a garment separately, image-based 3D virtual try-on methods aim to reconstruct a 3D human model that realistically portrays the person wearing the desired garment. In this paper, we present IPVTON, a novel image-based 3D virtual try-on framework. IPVTON employs score distillation sampling with image prompts to optimize a hybrid 3D human representation, integrating target garment features into diffusion priors through an image prompt adapter. To avoid interference with non-target areas, we leverage mask-guided image prompt embeddings to focus the image features on the try-on regions. Moreover, we impose geometric constraints on the 3D model with a pseudo silhouette generated by ControlNet, ensuring that the clothed 3D human model retains the shape of the source identity while accurately wearing the target garments. Extensive qualitative and quantitative experiments demonstrate that IPVTON outperforms previous methods in image-based 3D virtual try-on tasks, excelling in both geometry and texture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15616v1-abstract-full').style.display = 'none'; document.getElementById('2501.15616v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> aaai2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13969">arXiv:2501.13969</a> <span> [<a href="https://arxiv.org/pdf/2501.13969">pdf</a>, <a href="https://arxiv.org/format/2501.13969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> InsTex: Indoor Scenes Stylized Texture Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yunfan Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhiqi Shen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Vun%2C+N">Nicolas Vun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13969v1-abstract-short" style="display: inline;"> Generating high-quality textures for 3D scenes is crucial for applications in interior design, gaming, and augmented/virtual reality (AR/VR). Although recent advancements in 3D generative models have enhanced content creation, significant challenges remain in achieving broad generalization and maintaining style consistency across multiple viewpoints. Current methods, such as 2D diffusion models ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13969v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13969v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13969v1-abstract-full" style="display: none;"> Generating high-quality textures for 3D scenes is crucial for applications in interior design, gaming, and augmented/virtual reality (AR/VR). Although recent advancements in 3D generative models have enhanced content creation, significant challenges remain in achieving broad generalization and maintaining style consistency across multiple viewpoints. Current methods, such as 2D diffusion models adapted for 3D texturing, suffer from lengthy processing times and visual artifacts, while approaches driven by 3D data often fail to generalize effectively. To overcome these challenges, we introduce InsTex, a two-stage architecture designed to generate high-quality, style-consistent textures for 3D indoor scenes. InsTex utilizes depth-to-image diffusion priors in a coarse-to-fine pipeline, first generating multi-view images with a pre-trained 2D diffusion model and subsequently refining the textures for consistency. Our method supports both textual and visual prompts, achieving state-of-the-art results in visual quality and quantitative metrics, and demonstrates its effectiveness across various 3D texturing applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13969v1-abstract-full').style.display = 'none'; document.getElementById('2501.13969v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13335">arXiv:2501.13335</a> <span> [<a href="https://arxiv.org/pdf/2501.13335">pdf</a>, <a href="https://arxiv.org/format/2501.13335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deblur-Avatar: Animatable Avatars from Motion-Blurred Monocular Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+X">Xianrui Luo</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Juewen Peng</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zhongang Cai</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lei Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zhiguo Cao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13335v1-abstract-short" style="display: inline;"> We introduce Deblur-Avatar, a novel framework for modeling high-fidelity, animatable 3D human avatars from motion-blurred monocular video inputs. Motion blur is prevalent in real-world dynamic video capture, especially due to human movements in 3D human avatar modeling. Existing methods either (1) assume sharp image inputs, failing to address the detail loss introduced by motion blur, or (2) mainl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13335v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13335v1-abstract-full" style="display: none;"> We introduce Deblur-Avatar, a novel framework for modeling high-fidelity, animatable 3D human avatars from motion-blurred monocular video inputs. Motion blur is prevalent in real-world dynamic video capture, especially due to human movements in 3D human avatar modeling. Existing methods either (1) assume sharp image inputs, failing to address the detail loss introduced by motion blur, or (2) mainly consider blur by camera movements, neglecting the human motion blur which is more common in animatable avatars. Our proposed approach integrates a human movement-based motion blur model into 3D Gaussian Splatting (3DGS). By explicitly modeling human motion trajectories during exposure time, we jointly optimize the trajectories and 3D Gaussians to reconstruct sharp, high-quality human avatars. We employ a pose-dependent fusion mechanism to distinguish moving body regions, optimizing both blurred and sharp areas effectively. Extensive experiments on synthetic and real-world datasets demonstrate that Deblur-Avatar significantly outperforms existing methods in rendering quality and quantitative metrics, producing sharp avatar reconstructions and enabling real-time rendering under challenging motion blur conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13335v1-abstract-full').style.display = 'none'; document.getElementById('2501.13335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02241">arXiv:2501.02241</a> <span> [<a href="https://arxiv.org/pdf/2501.02241">pdf</a>, <a href="https://arxiv.org/format/2501.02241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Interpretable Load Forecasting via Representation Learning of Geo-distributed Meteorological Factors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yangze Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guoxin Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Gonghao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02241v1-abstract-short" style="display: inline;"> Meteorological factors (MF) are crucial in day-ahead load forecasting as they significantly influence the electricity consumption behaviors of consumers. Numerous studies have incorporated MF into the load forecasting model to achieve higher accuracy. Selecting MF from one representative location or the averaged MF as the inputs of the forecasting model is a common practice. However, the differenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02241v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02241v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02241v1-abstract-full" style="display: none;"> Meteorological factors (MF) are crucial in day-ahead load forecasting as they significantly influence the electricity consumption behaviors of consumers. Numerous studies have incorporated MF into the load forecasting model to achieve higher accuracy. Selecting MF from one representative location or the averaged MF as the inputs of the forecasting model is a common practice. However, the difference in MF collected in various locations within a region may be significant, which poses a challenge in selecting the appropriate MF from numerous locations. A representation learning framework is proposed to extract geo-distributed MF while considering their spatial relationships. In addition, this paper employs the Shapley value in the graph-based model to reveal connections between MF collected in different locations and loads. To reduce the computational complexity of calculating the Shapley value, an acceleration method is adopted based on Monte Carlo sampling and weighted linear regression. Experiments on two real-world datasets demonstrate that the proposed method improves the day-ahead forecasting accuracy, especially in extreme scenarios such as the "accumulation temperature effect" in summer and "sudden temperature change" in winter. We also find a significant correlation between the importance of MF in different locations and the corresponding area's GDP and mainstay industry. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02241v1-abstract-full').style.display = 'none'; document.getElementById('2501.02241v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02182">arXiv:2501.02182</a> <span> [<a href="https://arxiv.org/pdf/2501.02182">pdf</a>, <a href="https://arxiv.org/format/2501.02182">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AdaMixup: A Dynamic Defense Framework for Membership Inference Attack Mitigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Ying Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiajing Chen</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+Y">Yijie Weng</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+C">ChiaHua Chang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dezhi Yu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guanbiao Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02182v1-abstract-short" style="display: inline;"> Membership inference attacks have emerged as a significant privacy concern in the training of deep learning models, where attackers can infer whether a data point was part of the training set based on the model's outputs. To address this challenge, we propose a novel defense mechanism, AdaMixup. AdaMixup employs adaptive mixup techniques to enhance the model's robustness against membership inferen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02182v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02182v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02182v1-abstract-full" style="display: none;"> Membership inference attacks have emerged as a significant privacy concern in the training of deep learning models, where attackers can infer whether a data point was part of the training set based on the model's outputs. To address this challenge, we propose a novel defense mechanism, AdaMixup. AdaMixup employs adaptive mixup techniques to enhance the model's robustness against membership inference attacks by dynamically adjusting the mixup strategy during training. This method not only improves the model's privacy protection but also maintains high performance. Experimental results across multiple datasets demonstrate that AdaMixup significantly reduces the risk of membership inference attacks while achieving a favorable trade-off between defensive efficiency and model accuracy. This research provides an effective solution for data privacy protection and lays the groundwork for future advancements in mixup training methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02182v1-abstract-full').style.display = 'none'; document.getElementById('2501.02182v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20227">arXiv:2412.20227</a> <span> [<a href="https://arxiv.org/pdf/2412.20227">pdf</a>, <a href="https://arxiv.org/format/2412.20227">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLM Reasoning Engine: Specialized Training for Enhanced Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shuguang Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20227v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have shown remarkable performance in various natural language processing tasks but face challenges in mathematical reasoning, where complex problem-solving requires both linguistic understanding and mathematical reasoning skills. Existing approaches to address this challenge often rely on ensemble methods and suffer from the problem of data scarcity in target domains.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20227v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20227v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20227v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have shown remarkable performance in various natural language processing tasks but face challenges in mathematical reasoning, where complex problem-solving requires both linguistic understanding and mathematical reasoning skills. Existing approaches to address this challenge often rely on ensemble methods and suffer from the problem of data scarcity in target domains. In this work, we present a novel method to enhance LLMs' capabilities in mathematical reasoning tasks. Motivated by the need to bridge this gap, our approach incorporates a question paraphrase strategy, which aims at diversifying the linguistic forms of mathematical questions to improve generalization. Additionally, specialized training objectives are employed to guide the model's learning process, focusing on enhancing its understanding of mathematical concepts and reasoning processes. We conduct experiments on four datasets using different LLMs, and demonstrate the effectiveness of our approach in improving LLMs' performance on mathematical reasoning tasks. Our findings underscore the significance of our methodology in the advancement of large language models and its potential implications for real-world applications that require mathematical reasoning abilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20227v1-abstract-full').style.display = 'none'; document.getElementById('2412.20227v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16915">arXiv:2412.16915</a> <span> [<a href="https://arxiv.org/pdf/2412.16915">pdf</a>, <a href="https://arxiv.org/format/2412.16915">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> FADA: Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+T">Tianyun Zhong</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chao Liang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jianwen Jiang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Gaojie Lin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiaqi Yang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16915v1-abstract-short" style="display: inline;"> Diffusion-based audio-driven talking avatar methods have recently gained attention for their high-fidelity, vivid, and expressive results. However, their slow inference speed limits practical applications. Despite the development of various distillation techniques for diffusion models, we found that naive diffusion distillation methods do not yield satisfactory results. Distilled models exhibit re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16915v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16915v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16915v1-abstract-full" style="display: none;"> Diffusion-based audio-driven talking avatar methods have recently gained attention for their high-fidelity, vivid, and expressive results. However, their slow inference speed limits practical applications. Despite the development of various distillation techniques for diffusion models, we found that naive diffusion distillation methods do not yield satisfactory results. Distilled models exhibit reduced robustness with open-set input images and a decreased correlation between audio and video compared to teacher models, undermining the advantages of diffusion models. To address this, we propose FADA (Fast Diffusion Avatar Synthesis with Mixed-Supervised Multi-CFG Distillation). We first designed a mixed-supervised loss to leverage data of varying quality and enhance the overall model capability as well as robustness. Additionally, we propose a multi-CFG distillation with learnable tokens to utilize the correlation between audio and reference image conditions, reducing the threefold inference runs caused by multi-CFG with acceptable quality degradation. Extensive experiments across multiple datasets show that FADA generates vivid videos comparable to recent diffusion model-based methods while achieving an NFE speedup of 4.17-12.5 times. Demos are available at our webpage http://fadavatar.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16915v1-abstract-full').style.display = 'none'; document.getElementById('2412.16915v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11538">arXiv:2412.11538</a> <span> [<a href="https://arxiv.org/pdf/2412.11538">pdf</a>, <a href="https://arxiv.org/format/2412.11538">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MERaLiON-SpeechEncoder: Towards a Speech Foundation Model for Singapore and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huzaifah%2C+M">Muhammad Huzaifah</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Geyu Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianchi Liu</a>, <a href="/search/cs?searchtype=author&query=Sailor%2C+H+B">Hardik B. Sailor</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+K+M">Kye Min Tan</a>, <a href="/search/cs?searchtype=author&query=Vangani%2C+T+K">Tarun K. Vangani</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiongqiong Wang</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+J+H+M">Jeremy H. M. Wong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+N+F">Nancy F. Chen</a>, <a href="/search/cs?searchtype=author&query=Aw%2C+A+T">Ai Ti Aw</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11538v2-abstract-short" style="display: inline;"> This technical report describes the MERaLiON-SpeechEncoder, a foundation model designed to support a wide range of downstream speech applications. Developed as part of Singapore's National Multimodal Large Language Model Programme, the MERaLiON-SpeechEncoder is tailored to address the speech processing needs in Singapore and the surrounding Southeast Asian region. The model currently supports main… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11538v2-abstract-full').style.display = 'inline'; document.getElementById('2412.11538v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11538v2-abstract-full" style="display: none;"> This technical report describes the MERaLiON-SpeechEncoder, a foundation model designed to support a wide range of downstream speech applications. Developed as part of Singapore's National Multimodal Large Language Model Programme, the MERaLiON-SpeechEncoder is tailored to address the speech processing needs in Singapore and the surrounding Southeast Asian region. The model currently supports mainly English, including the variety spoken in Singapore. We are actively expanding our datasets to gradually cover other languages in subsequent releases. The MERaLiON-SpeechEncoder was pre-trained from scratch on 200,000 hours of unlabelled speech data using a self-supervised learning approach based on masked language modelling. We describe our training procedure and hyperparameter tuning experiments in detail below. Our evaluation demonstrates improvements to spontaneous and Singapore speech benchmarks for speech recognition, while remaining competitive to other state-of-the-art speech encoders across ten other speech tasks. We commit to releasing our model, supporting broader research endeavours, both in Singapore and beyond. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11538v2-abstract-full').style.display = 'none'; document.getElementById('2412.11538v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10411">arXiv:2412.10411</a> <span> [<a href="https://arxiv.org/pdf/2412.10411">pdf</a>, <a href="https://arxiv.org/format/2412.10411">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Pre-trained protein language model for codon optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pathak%2C+S">Shashank Pathak</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guohui Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10411v1-abstract-short" style="display: inline;"> Motivation: Codon optimization of Open Reading Frame (ORF) sequences is essential for enhancing mRNA stability and expression in applications like mRNA vaccines, where codon choice can significantly impact protein yield which directly impacts immune strength. In this work, we investigate the use of a pre-trained protein language model (PPLM) for getting a rich representation of amino acids which c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10411v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10411v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10411v1-abstract-full" style="display: none;"> Motivation: Codon optimization of Open Reading Frame (ORF) sequences is essential for enhancing mRNA stability and expression in applications like mRNA vaccines, where codon choice can significantly impact protein yield which directly impacts immune strength. In this work, we investigate the use of a pre-trained protein language model (PPLM) for getting a rich representation of amino acids which could be utilized for codon optimization. This leaves us with a simpler fine-tuning task over PPLM in optimizing ORF sequences. Results: The ORFs generated by our proposed models outperformed their natural counterparts encoding the same proteins on computational metrics for stability and expression. They also demonstrated enhanced performance against the benchmark ORFs used in mRNA vaccines for the SARS-CoV-2 viral spike protein and the varicella-zoster virus (VZV). These results highlight the potential of adapting PPLM for designing ORFs tailored to encode target antigens in mRNA vaccines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10411v1-abstract-full').style.display = 'none'; document.getElementById('2412.10411v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10061">arXiv:2412.10061</a> <span> [<a href="https://arxiv.org/pdf/2412.10061">pdf</a>, <a href="https://arxiv.org/format/2412.10061">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Quaffure: Real-Time Quasi-Static Neural Hair Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Stuyck%2C+T">Tuur Stuyck</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G+W">Gene Wei-Chin Lin</a>, <a href="/search/cs?searchtype=author&query=Larionov%2C+E">Egor Larionov</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hsiao-yu Chen</a>, <a href="/search/cs?searchtype=author&query=Bozic%2C+A">Aljaz Bozic</a>, <a href="/search/cs?searchtype=author&query=Sarafianos%2C+N">Nikolaos Sarafianos</a>, <a href="/search/cs?searchtype=author&query=Roble%2C+D">Doug Roble</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10061v1-abstract-short" style="display: inline;"> Realistic hair motion is crucial for high-quality avatars, but it is often limited by the computational resources available for real-time applications. To address this challenge, we propose a novel neural approach to predict physically plausible hair deformations that generalizes to various body poses, shapes, and hairstyles. Our model is trained using a self-supervised loss, eliminating the need… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10061v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10061v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10061v1-abstract-full" style="display: none;"> Realistic hair motion is crucial for high-quality avatars, but it is often limited by the computational resources available for real-time applications. To address this challenge, we propose a novel neural approach to predict physically plausible hair deformations that generalizes to various body poses, shapes, and hairstyles. Our model is trained using a self-supervised loss, eliminating the need for expensive data generation and storage. We demonstrate our method's effectiveness through numerous results across a wide range of pose and shape variations, showcasing its robust generalization capabilities and temporally smooth results. Our approach is highly suitable for real-time applications with an inference time of only a few milliseconds on consumer hardware and its ability to scale to predicting the drape of 1000 grooms in 0.3 seconds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10061v1-abstract-full').style.display = 'none'; document.getElementById('2412.10061v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07811">arXiv:2412.07811</a> <span> [<a href="https://arxiv.org/pdf/2412.07811">pdf</a>, <a href="https://arxiv.org/format/2412.07811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Autoencoders in Operator Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Enyeart%2C+D">Dustin Enyeart</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07811v1-abstract-short" style="display: inline;"> DeepONets and Koopman autoencoders are two prevalent neural operator architectures. These architectures are autoencoders. An adversarial addition to an autoencoder have improved performance of autoencoders in various areas of machine learning. In this paper, the use an adversarial addition for these two neural operator architectures is studied. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07811v1-abstract-full" style="display: none;"> DeepONets and Koopman autoencoders are two prevalent neural operator architectures. These architectures are autoencoders. An adversarial addition to an autoencoder have improved performance of autoencoders in various areas of machine learning. In this paper, the use an adversarial addition for these two neural operator architectures is studied. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07811v1-abstract-full').style.display = 'none'; document.getElementById('2412.07811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2412.06686, arXiv:2412.04578</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06686">arXiv:2412.06686</a> <span> [<a href="https://arxiv.org/pdf/2412.06686">pdf</a>, <a href="https://arxiv.org/format/2412.06686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Some Best Practices in Operator Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Enyeart%2C+D">Dustin Enyeart</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06686v1-abstract-short" style="display: inline;"> Hyperparameters searches are computationally expensive. This paper studies some general choices of hyperparameters and training methods specifically for operator learning. It considers the architectures DeepONets, Fourier neural operators and Koopman autoencoders for several differential equations to find robust trends. Some options considered are activation functions, dropout and stochastic weigh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06686v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06686v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06686v1-abstract-full" style="display: none;"> Hyperparameters searches are computationally expensive. This paper studies some general choices of hyperparameters and training methods specifically for operator learning. It considers the architectures DeepONets, Fourier neural operators and Koopman autoencoders for several differential equations to find robust trends. Some options considered are activation functions, dropout and stochastic weight averaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06686v1-abstract-full').style.display = 'none'; document.getElementById('2412.06686v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2412.04578</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04578">arXiv:2412.04578</a> <span> [<a href="https://arxiv.org/pdf/2412.04578">pdf</a>, <a href="https://arxiv.org/format/2412.04578">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Loss Terms and Operator Forms of Koopman Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Enyeart%2C+D">Dustin Enyeart</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04578v1-abstract-short" style="display: inline;"> Koopman autoencoders are a prevalent architecture in operator learning. But, the loss functions and the form of the operator vary significantly in the literature. This paper presents a fair and systemic study of these options. Furthermore, it introduces novel loss terms. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04578v1-abstract-full" style="display: none;"> Koopman autoencoders are a prevalent architecture in operator learning. But, the loss functions and the form of the operator vary significantly in the literature. This paper presents a fair and systemic study of these options. Furthermore, it introduces novel loss terms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04578v1-abstract-full').style.display = 'none'; document.getElementById('2412.04578v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00578">arXiv:2412.00578</a> <span> [<a href="https://arxiv.org/pdf/2412.00578">pdf</a>, <a href="https://arxiv.org/format/2412.00578">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Speedy-Splat: Fast 3D Gaussian Splatting with Sparse Pixels and Sparse Primitives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hanson%2C+A">Alex Hanson</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+A">Allen Tu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Geng Lin</a>, <a href="/search/cs?searchtype=author&query=Singla%2C+V">Vasu Singla</a>, <a href="/search/cs?searchtype=author&query=Zwicker%2C+M">Matthias Zwicker</a>, <a href="/search/cs?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00578v1-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3D-GS) is a recent 3D scene reconstruction technique that enables real-time rendering of novel views by modeling scenes as parametric point clouds of differentiable 3D Gaussians. However, its rendering speed and model size still present bottlenecks, especially in resource-constrained settings. In this paper, we identify and address two key inefficiencies in 3D-GS, achieving… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00578v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00578v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00578v1-abstract-full" style="display: none;"> 3D Gaussian Splatting (3D-GS) is a recent 3D scene reconstruction technique that enables real-time rendering of novel views by modeling scenes as parametric point clouds of differentiable 3D Gaussians. However, its rendering speed and model size still present bottlenecks, especially in resource-constrained settings. In this paper, we identify and address two key inefficiencies in 3D-GS, achieving substantial improvements in rendering speed, model size, and training time. First, we optimize the rendering pipeline to precisely localize Gaussians in the scene, boosting rendering speed without altering visual fidelity. Second, we introduce a novel pruning technique and integrate it into the training pipeline, significantly reducing model size and training time while further raising rendering speed. Our Speedy-Splat approach combines these techniques to accelerate average rendering speed by a drastic $6.71\times$ across scenes from the Mip-NeRF 360, Tanks & Temples, and Deep Blending datasets with $10.6\times$ fewer primitives than 3D-GS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00578v1-abstract-full').style.display = 'none'; document.getElementById('2412.00578v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00555">arXiv:2412.00555</a> <span> [<a href="https://arxiv.org/pdf/2412.00555">pdf</a>, <a href="https://arxiv.org/format/2412.00555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning Dynamic Weight Adjustment for Spatial-Temporal Trajectory Planning in Crowd Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+M">Muqing Cao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinhang Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yizhuo Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianping Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+T">Tongxing Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengfei Wang</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+T">Tzu-Yi Hung</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+L">Lihua Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00555v1-abstract-short" style="display: inline;"> Robot navigation in dense human crowds poses a significant challenge due to the complexity of human behavior in dynamic and obstacle-rich environments. In this work, we propose a dynamic weight adjustment scheme using a neural network to predict the optimal weights of objectives in an optimization-based motion planner. We adopt a spatial-temporal trajectory planner and incorporate diverse objectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00555v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00555v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00555v1-abstract-full" style="display: none;"> Robot navigation in dense human crowds poses a significant challenge due to the complexity of human behavior in dynamic and obstacle-rich environments. In this work, we propose a dynamic weight adjustment scheme using a neural network to predict the optimal weights of objectives in an optimization-based motion planner. We adopt a spatial-temporal trajectory planner and incorporate diverse objectives to achieve a balance among safety, efficiency, and goal achievement in complex and dynamic environments. We design the network structure, observation encoding, and reward function to effectively train the policy network using reinforcement learning, allowing the robot to adapt its behavior in real time based on environmental and pedestrian information. Simulation results show improved safety compared to the fixed-weight planner and the state-of-the-art learning-based methods, and verify the ability of the learned policy to adaptively adjust the weights based on the observed situations. The approach's feasibility is demonstrated in a navigation task using an autonomous delivery robot across a crowded corridor over a 300 m distance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00555v1-abstract-full').style.display = 'none'; document.getElementById('2412.00555v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18217">arXiv:2411.18217</a> <span> [<a href="https://arxiv.org/pdf/2411.18217">pdf</a>, <a href="https://arxiv.org/format/2411.18217">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> How to Learn a New Language? An Efficient Solution for Self-Supervised Learning Models Unseen Languages Adaption in Low-Resource Scenario </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shih-Heng Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zih-Ching Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jiatong Shi</a>, <a href="/search/cs?searchtype=author&query=Chuang%2C+M">Ming-To Chuang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guan-Ting Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kuan-Po Huang</a>, <a href="/search/cs?searchtype=author&query=Harwath%2C+D">David Harwath</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shang-Wen Li</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18217v2-abstract-short" style="display: inline;"> The utilization of speech Self-Supervised Learning (SSL) models achieves impressive performance on Automatic Speech Recognition (ASR). However, in low-resource language ASR, they encounter the domain mismatch problem between pre-trained and low-resource languages. Typical solutions like fine-tuning the SSL model suffer from high computation costs while using frozen SSL models as feature extractors… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18217v2-abstract-full').style.display = 'inline'; document.getElementById('2411.18217v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18217v2-abstract-full" style="display: none;"> The utilization of speech Self-Supervised Learning (SSL) models achieves impressive performance on Automatic Speech Recognition (ASR). However, in low-resource language ASR, they encounter the domain mismatch problem between pre-trained and low-resource languages. Typical solutions like fine-tuning the SSL model suffer from high computation costs while using frozen SSL models as feature extractors comes with poor performance. To handle these issues, we extend a conventional efficient fine-tuning scheme based on the adapter. We add an extra intermediate adaptation to warm up the adapter and downstream model initialization. Remarkably, we update only 1-5% of the total model parameters to achieve the adaptation. Experimental results on the ML-SUPERB dataset show that our solution outperforms conventional efficient fine-tuning. It achieves up to a 28% relative improvement in the Character/Phoneme error rate when adapting to unseen languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18217v2-abstract-full').style.display = 'none'; document.getElementById('2411.18217v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11136">arXiv:2411.11136</a> <span> [<a href="https://arxiv.org/pdf/2411.11136">pdf</a>, <a href="https://arxiv.org/format/2411.11136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> </div> </div> <p class="title is-5 mathjax"> Approximation algorithms for non-sequential star packing problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+A">An Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yong Chen</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+M">Mingyang Gong</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guohui Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11136v1-abstract-short" style="display: inline;"> For a positive integer $k \ge 1$, a $k$-star ($k^+$-star, $k^-$-star, respectively) is a connected graph containing a degree-$\ell$ vertex and $\ell$ degree-$1$ vertices, where $\ell = k$ ($\ell \ge k$, $1 \le \ell \le k$, respectively). The $k^+$-star packing problem is to cover as many vertices of an input graph $G$ as possible using vertex-disjoint $k^+$-stars in $G$; and given $k > t \ge 1$, t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11136v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11136v1-abstract-full" style="display: none;"> For a positive integer $k \ge 1$, a $k$-star ($k^+$-star, $k^-$-star, respectively) is a connected graph containing a degree-$\ell$ vertex and $\ell$ degree-$1$ vertices, where $\ell = k$ ($\ell \ge k$, $1 \le \ell \le k$, respectively). The $k^+$-star packing problem is to cover as many vertices of an input graph $G$ as possible using vertex-disjoint $k^+$-stars in $G$; and given $k > t \ge 1$, the $k^-/t$-star packing problem is to cover as many vertices of $G$ as possible using vertex-disjoint $k^-$-stars but no $t$-stars in $G$. Both problems are NP-hard for any fixed $k \ge 2$. We present a $(1 + \frac {k^2}{2k+1})$- and a $\frac 32$-approximation algorithms for the $k^+$-star packing problem when $k \ge 3$ and $k = 2$, respectively, and a $(1 + \frac 1{t + 1 + 1/k})$-approximation algorithm for the $k^-/t$-star packing problem when $k > t \ge 2$. They are all local search algorithms and they improve the best known approximation algorithms for the problems, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11136v1-abstract-full').style.display = 'none'; document.getElementById('2411.11136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for presentation in WALCOM 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07239">arXiv:2411.07239</a> <span> [<a href="https://arxiv.org/pdf/2411.07239">pdf</a>, <a href="https://arxiv.org/format/2411.07239">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepONet as a Multi-Operator Extrapolation Model: Distributed Pretraining with Physics-Informed Fine-Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zecheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Moya%2C+C">Christian Moya</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a>, <a href="/search/cs?searchtype=author&query=Schaeffer%2C+H">Hayden Schaeffer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07239v1-abstract-short" style="display: inline;"> We propose a novel fine-tuning method to achieve multi-operator learning through training a distributed neural operator with diverse function data and then zero-shot fine-tuning the neural network using physics-informed losses for downstream tasks. Operator learning effectively approximates solution operators for PDEs and various PDE-related problems, yet it often struggles to generalize to new ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07239v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07239v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07239v1-abstract-full" style="display: none;"> We propose a novel fine-tuning method to achieve multi-operator learning through training a distributed neural operator with diverse function data and then zero-shot fine-tuning the neural network using physics-informed losses for downstream tasks. Operator learning effectively approximates solution operators for PDEs and various PDE-related problems, yet it often struggles to generalize to new tasks. To address this, we investigate fine-tuning a pretrained model, while carefully selecting an initialization that enables rapid adaptation to new tasks with minimal data. Our approach combines distributed learning to integrate data from various operators in pre-training, while physics-informed methods enable zero-shot fine-tuning, minimizing the reliance on downstream data. We investigate standard fine-tuning and Low-Rank Adaptation fine-tuning, applying both to train complex nonlinear target operators that are difficult to learn only using random initialization. Through comprehensive numerical examples, we demonstrate the advantages of our approach, showcasing significant improvements in accuracy. Our findings provide a robust framework for advancing multi-operator learning and highlight the potential of transfer learning techniques in this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07239v1-abstract-full').style.display = 'none'; document.getElementById('2411.07239v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06573">arXiv:2411.06573</a> <span> [<a href="https://arxiv.org/pdf/2411.06573">pdf</a>, <a href="https://arxiv.org/format/2411.06573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> An Energy-Based Self-Adaptive Learning Rate for Stochastic Gradient Descent: Enhancing Unconstrained Optimization with VAV method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiahao Zhang</a>, <a href="/search/cs?searchtype=author&query=Moya%2C+C">Christian Moya</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06573v1-abstract-short" style="display: inline;"> Optimizing the learning rate remains a critical challenge in machine learning, essential for achieving model stability and efficient convergence. The Vector Auxiliary Variable (VAV) algorithm introduces a novel energy-based self-adjustable learning rate optimization method designed for unconstrained optimization problems. It incorporates an auxiliary variable $r$ to facilitate efficient energy app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06573v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06573v1-abstract-full" style="display: none;"> Optimizing the learning rate remains a critical challenge in machine learning, essential for achieving model stability and efficient convergence. The Vector Auxiliary Variable (VAV) algorithm introduces a novel energy-based self-adjustable learning rate optimization method designed for unconstrained optimization problems. It incorporates an auxiliary variable $r$ to facilitate efficient energy approximation without backtracking while adhering to the unconditional energy dissipation law. Notably, VAV demonstrates superior stability with larger learning rates and achieves faster convergence in the early stage of the training process. Comparative analyses demonstrate that VAV outperforms Stochastic Gradient Descent (SGD) across various tasks. This paper also provides rigorous proof of the energy dissipation law and establishes the convergence of the algorithm under reasonable assumptions. Additionally, $r$ acts as an empirical lower bound of the training loss in practice, offering a novel scheduling approach that further enhances algorithm performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06573v1-abstract-full').style.display = 'none'; document.getElementById('2411.06573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05544">arXiv:2411.05544</a> <span> [<a href="https://arxiv.org/pdf/2411.05544">pdf</a>, <a href="https://arxiv.org/format/2411.05544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Lifelong Few-Shot Customization of Text-to-Image Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+N">Nan Song</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofeng Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ze Yang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05544v1-abstract-short" style="display: inline;"> Lifelong few-shot customization for text-to-image diffusion aims to continually generalize existing models for new tasks with minimal data while preserving old knowledge. Current customization diffusion models excel in few-shot tasks but struggle with catastrophic forgetting problems in lifelong generations. In this study, we identify and categorize the catastrophic forgetting problems into two fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05544v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05544v1-abstract-full" style="display: none;"> Lifelong few-shot customization for text-to-image diffusion aims to continually generalize existing models for new tasks with minimal data while preserving old knowledge. Current customization diffusion models excel in few-shot tasks but struggle with catastrophic forgetting problems in lifelong generations. In this study, we identify and categorize the catastrophic forgetting problems into two folds: relevant concepts forgetting and previous concepts forgetting. To address these challenges, we first devise a data-free knowledge distillation strategy to tackle relevant concepts forgetting. Unlike existing methods that rely on additional real data or offline replay of original concept data, our approach enables on-the-fly knowledge distillation to retain the previous concepts while learning new ones, without accessing any previous data. Second, we develop an In-Context Generation (ICGen) paradigm that allows the diffusion model to be conditioned upon the input vision context, which facilitates the few-shot generation and mitigates the issue of previous concepts forgetting. Extensive experiments show that the proposed Lifelong Few-Shot Diffusion (LFS-Diffusion) method can produce high-quality and accurate images while maintaining previously learned knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05544v1-abstract-full').style.display = 'none'; document.getElementById('2411.05544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01834">arXiv:2411.01834</a> <span> [<a href="https://arxiv.org/pdf/2411.01834">pdf</a>, <a href="https://arxiv.org/format/2411.01834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Align-SLM: Textless Spoken Language Models with Reinforcement Learning from AI Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guan-Ting Lin</a>, <a href="/search/cs?searchtype=author&query=Shivakumar%2C+P+G">Prashanth Gurunath Shivakumar</a>, <a href="/search/cs?searchtype=author&query=Gourav%2C+A">Aditya Gourav</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yile Gu</a>, <a href="/search/cs?searchtype=author&query=Gandhe%2C+A">Ankur Gandhe</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a>, <a href="/search/cs?searchtype=author&query=Bulyko%2C+I">Ivan Bulyko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01834v1-abstract-short" style="display: inline;"> While textless Spoken Language Models (SLMs) have shown potential in end-to-end speech-to-speech modeling, they still lag behind text-based Large Language Models (LLMs) in terms of semantic coherence and relevance. This work introduces the Align-SLM framework, which leverages preference optimization inspired by Reinforcement Learning with AI Feedback (RLAIF) to enhance the semantic understanding o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01834v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01834v1-abstract-full" style="display: none;"> While textless Spoken Language Models (SLMs) have shown potential in end-to-end speech-to-speech modeling, they still lag behind text-based Large Language Models (LLMs) in terms of semantic coherence and relevance. This work introduces the Align-SLM framework, which leverages preference optimization inspired by Reinforcement Learning with AI Feedback (RLAIF) to enhance the semantic understanding of SLMs. Our approach generates multiple speech continuations from a given prompt and uses semantic metrics to create preference data for Direct Preference Optimization (DPO). We evaluate the framework using ZeroSpeech 2021 benchmarks for lexical and syntactic modeling, the spoken version of the StoryCloze dataset for semantic coherence, and other speech generation metrics, including the GPT4-o score and human evaluation. Experimental results show that our method achieves state-of-the-art performance for SLMs on most benchmarks, highlighting the importance of preference optimization to improve the semantics of SLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01834v1-abstract-full').style.display = 'none'; document.getElementById('2411.01834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01719">arXiv:2411.01719</a> <span> [<a href="https://arxiv.org/pdf/2411.01719">pdf</a>, <a href="https://arxiv.org/format/2411.01719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> LES-SINDy: Laplace-Enhanced Sparse Identification of Nonlinear Dynamical Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Haoyang Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01719v1-abstract-short" style="display: inline;"> Sparse Identification of Nonlinear Dynamical Systems (SINDy) is a powerful tool for the data-driven discovery of governing equations. However, it encounters challenges when modeling complex dynamical systems involving high-order derivatives or discontinuities, particularly in the presence of noise. These limitations restrict its applicability across various fields in applied mathematics and physic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01719v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01719v1-abstract-full" style="display: none;"> Sparse Identification of Nonlinear Dynamical Systems (SINDy) is a powerful tool for the data-driven discovery of governing equations. However, it encounters challenges when modeling complex dynamical systems involving high-order derivatives or discontinuities, particularly in the presence of noise. These limitations restrict its applicability across various fields in applied mathematics and physics. To mitigate these, we propose Laplace-Enhanced SparSe Identification of Nonlinear Dynamical Systems (LES-SINDy). By transforming time-series measurements from the time domain to the Laplace domain using the Laplace transform and integration by parts, LES-SINDy enables more accurate approximations of derivatives and discontinuous terms. It also effectively handles unbounded growth functions and accumulated numerical errors in the Laplace domain, thereby overcoming challenges in the identification process. The model evaluation process selects the most accurate and parsimonious dynamical systems from multiple candidates. Experimental results across diverse ordinary and partial differential equations show that LES-SINDy achieves superior robustness, accuracy, and parsimony compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01719v1-abstract-full').style.display = 'none'; document.getElementById('2411.01719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">47 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01432">arXiv:2411.01432</a> <span> [<a href="https://arxiv.org/pdf/2411.01432">pdf</a>, <a href="https://arxiv.org/format/2411.01432">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Meta-Exploiting Frequency Prior for Cross-Domain Few-Shot Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fei Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenghua Chen</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+W">Wei Wei</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+C">Chen Ding</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01432v1-abstract-short" style="display: inline;"> Meta-learning offers a promising avenue for few-shot learning (FSL), enabling models to glean a generalizable feature embedding through episodic training on synthetic FSL tasks in a source domain. Yet, in practical scenarios where the target task diverges from that in the source domain, meta-learning based method is susceptible to over-fitting. To overcome this, we introduce a novel framework, Met… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01432v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01432v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01432v1-abstract-full" style="display: none;"> Meta-learning offers a promising avenue for few-shot learning (FSL), enabling models to glean a generalizable feature embedding through episodic training on synthetic FSL tasks in a source domain. Yet, in practical scenarios where the target task diverges from that in the source domain, meta-learning based method is susceptible to over-fitting. To overcome this, we introduce a novel framework, Meta-Exploiting Frequency Prior for Cross-Domain Few-Shot Learning, which is crafted to comprehensively exploit the cross-domain transferable image prior that each image can be decomposed into complementary low-frequency content details and high-frequency robust structural characteristics. Motivated by this insight, we propose to decompose each query image into its high-frequency and low-frequency components, and parallel incorporate them into the feature embedding network to enhance the final category prediction. More importantly, we introduce a feature reconstruction prior and a prediction consistency prior to separately encourage the consistency of the intermediate feature as well as the final category prediction between the original query image and its decomposed frequency components. This allows for collectively guiding the network's meta-learning process with the aim of learning generalizable image feature embeddings, while not introducing any extra computational cost in the inference phase. Our framework establishes new state-of-the-art results on multiple cross-domain few-shot learning benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01432v1-abstract-full').style.display = 'none'; document.getElementById('2411.01432v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.24162">arXiv:2410.24162</a> <span> [<a href="https://arxiv.org/pdf/2410.24162">pdf</a>, <a href="https://arxiv.org/format/2410.24162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conformalized Prediction of Post-Fault Voltage Trajectories Using Pre-trained and Finetuned Attention-Driven Neural Operators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mollaali%2C+A">Amirhossein Mollaali</a>, <a href="/search/cs?searchtype=author&query=Zufferey%2C+G">Gabriel Zufferey</a>, <a href="/search/cs?searchtype=author&query=Constante-Flores%2C+G">Gonzalo Constante-Flores</a>, <a href="/search/cs?searchtype=author&query=Moya%2C+C">Christian Moya</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Can Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+M">Meng Yue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.24162v1-abstract-short" style="display: inline;"> This paper proposes a new data-driven methodology for predicting intervals of post-fault voltage trajectories in power systems. We begin by introducing the Quantile Attention-Fourier Deep Operator Network (QAF-DeepONet), designed to capture the complex dynamics of voltage trajectories and reliably estimate quantiles of the target trajectory without any distributional assumptions. The proposed oper… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24162v1-abstract-full').style.display = 'inline'; document.getElementById('2410.24162v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.24162v1-abstract-full" style="display: none;"> This paper proposes a new data-driven methodology for predicting intervals of post-fault voltage trajectories in power systems. We begin by introducing the Quantile Attention-Fourier Deep Operator Network (QAF-DeepONet), designed to capture the complex dynamics of voltage trajectories and reliably estimate quantiles of the target trajectory without any distributional assumptions. The proposed operator regression model maps the observed portion of the voltage trajectory to its unobserved post-fault trajectory. Our methodology employs a pre-training and fine-tuning process to address the challenge of limited data availability. To ensure data privacy in learning the pre-trained model, we use merging via federated learning with data from neighboring buses, enabling the model to learn the underlying voltage dynamics from such buses without directly sharing their data. After pre-training, we fine-tune the model with data from the target bus, allowing it to adapt to unique dynamics and operating conditions. Finally, we integrate conformal prediction into the fine-tuned model to ensure coverage guarantees for the predicted intervals. We evaluated the performance of the proposed methodology using the New England 39-bus test system considering detailed models of voltage and frequency controllers. Two metrics, Prediction Interval Coverage Probability (PICP) and Prediction Interval Normalized Average Width (PINAW), are used to numerically assess the model's performance in predicting intervals. The results show that the proposed approach offers practical and reliable uncertainty quantification in predicting the interval of post-fault voltage trajectories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.24162v1-abstract-full').style.display = 'none'; document.getElementById('2410.24162v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22984">arXiv:2410.22984</a> <span> [<a href="https://arxiv.org/pdf/2410.22984">pdf</a>, <a href="https://arxiv.org/format/2410.22984">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Higher-order Cross-structural Embedding Model for Time Series Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guancen Lin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+C">Cong Shen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+A">Aijing Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22984v1-abstract-short" style="display: inline;"> Time series analysis has gained significant attention due to its critical applications in diverse fields such as healthcare, finance, and sensor networks. The complexity and non-stationarity of time series make it challenging to capture the interaction patterns across different timestamps. Current approaches struggle to model higher-order interactions within time series, and focus on learning temp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22984v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22984v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22984v1-abstract-full" style="display: none;"> Time series analysis has gained significant attention due to its critical applications in diverse fields such as healthcare, finance, and sensor networks. The complexity and non-stationarity of time series make it challenging to capture the interaction patterns across different timestamps. Current approaches struggle to model higher-order interactions within time series, and focus on learning temporal or spatial dependencies separately, which limits performance in downstream tasks. To address these gaps, we propose Higher-order Cross-structural Embedding Model for Time Series (High-TS), a novel framework that jointly models both temporal and spatial perspectives by combining multiscale Transformer with Topological Deep Learning (TDL). Meanwhile, High-TS utilizes contrastive learning to integrate these two structures for generating robust and discriminative representations. Extensive experiments show that High-TS outperforms state-of-the-art methods in various time series tasks and demonstrate the importance of higher-order cross-structural information in improving model performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22984v1-abstract-full').style.display = 'none'; document.getElementById('2410.22984v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15420">arXiv:2410.15420</a> <span> [<a href="https://arxiv.org/pdf/2410.15420">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Where to Build Food Banks and Pantries: A Two-Level Machine Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ruan%2C+G">Gavin Ruan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Ziqi Guo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15420v1-abstract-short" style="display: inline;"> Over 44 million Americans currently suffer from food insecurity, of whom 13 million are children. Across the United States, thousands of food banks and pantries serve as vital sources of food and other forms of aid for food insecure families. By optimizing food bank and pantry locations, food would become more accessible to families who desperately require it. In this work, we introduce a novel tw… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15420v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15420v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15420v1-abstract-full" style="display: none;"> Over 44 million Americans currently suffer from food insecurity, of whom 13 million are children. Across the United States, thousands of food banks and pantries serve as vital sources of food and other forms of aid for food insecure families. By optimizing food bank and pantry locations, food would become more accessible to families who desperately require it. In this work, we introduce a novel two-level optimization framework, which utilizes the K-Medoids clustering algorithm in conjunction with the Open-Source Routing Machine engine, to optimize food bank and pantry locations based on real road distances to houses and house blocks. Our proposed framework also has the adaptability to factor in considerations such as median household income using a pseudo-weighted K-Medoids algorithm. Testing conducted with California and Indiana household data, as well as comparisons with real food bank and pantry locations showed that interestingly, our proposed framework yields food pantry locations superior to those of real existing ones and saves significant distance for households, while there is a marginal penalty on the first level food bank to food pantry distance. Overall, we believe that the second-level benefits of this framework far outweigh any drawbacks and yield a net benefit result. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15420v1-abstract-full').style.display = 'none'; document.getElementById('2410.15420v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12381">arXiv:2410.12381</a> <span> [<a href="https://arxiv.org/pdf/2410.12381">pdf</a>, <a href="https://arxiv.org/format/2410.12381">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HumanEval-V: Benchmarking High-Level Visual Reasoning with Complex Diagrams in Coding Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fengji Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Linquan Wu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+H">Huiyu Bai</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guancheng Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiao Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiao Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bei Chen</a>, <a href="/search/cs?searchtype=author&query=Keung%2C+J">Jacky Keung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12381v3-abstract-short" style="display: inline;"> Understanding and reasoning over diagrams is a fundamental aspect of human intelligence. While Large Multimodal Models (LMMs) have demonstrated impressive capabilities across various tasks, existing benchmarks lack comprehensive evaluation of their diagram interpretation and reasoning abilities, particularly in coding contexts. We present HumanEval-V, a rigorous benchmark of human-annotated coding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12381v3-abstract-full').style.display = 'inline'; document.getElementById('2410.12381v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12381v3-abstract-full" style="display: none;"> Understanding and reasoning over diagrams is a fundamental aspect of human intelligence. While Large Multimodal Models (LMMs) have demonstrated impressive capabilities across various tasks, existing benchmarks lack comprehensive evaluation of their diagram interpretation and reasoning abilities, particularly in coding contexts. We present HumanEval-V, a rigorous benchmark of human-annotated coding tasks that spans six task types and evaluates diverse visual reasoning capabilities. Each task features carefully crafted diagrams paired with function signatures and test cases, employing novel code generation tasks to thoroughly assess models' diagram comprehension. Through extensive experiments with 22 LMMs, we find that even top-performing models achieve modest success rates, with Claude 3.5 Sonnet reaching only 36.8% pass@1, highlighting substantial room for improvement. Our analysis reveals that current LMMs struggle with spatial transformations, topological relationships, and dynamic patterns that humans find intuitive. These findings provide valuable insights for advancing LMMs' visual reasoning abilities. We have open-sourced our code and benchmark at https://github.com/HumanEval-V/HumanEval-V-Benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12381v3-abstract-full').style.display = 'none'; document.getElementById('2410.12381v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">homepage https://humaneval-v.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06921">arXiv:2410.06921</a> <span> [<a href="https://arxiv.org/pdf/2410.06921">pdf</a>, <a href="https://arxiv.org/format/2410.06921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Vulnerability as a Consequence of On-Manifold Inseparibility </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haldar%2C+R">Rajdeep Haldar</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yue Xing</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Q">Qifan Song</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guang Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06921v1-abstract-short" style="display: inline;"> Recent works have shown theoretically and empirically that redundant data dimensions are a source of adversarial vulnerability. However, the inverse doesn't seem to hold in practice; employing dimension-reduction techniques doesn't exhibit robustness as expected. In this work, we consider classification tasks and characterize the data distribution as a low-dimensional manifold, with high/low varia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06921v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06921v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06921v1-abstract-full" style="display: none;"> Recent works have shown theoretically and empirically that redundant data dimensions are a source of adversarial vulnerability. However, the inverse doesn't seem to hold in practice; employing dimension-reduction techniques doesn't exhibit robustness as expected. In this work, we consider classification tasks and characterize the data distribution as a low-dimensional manifold, with high/low variance features defining the on/off manifold direction. We argue that clean training experiences poor convergence in the off-manifold direction caused by the ill-conditioning in widely used first-order optimizers like gradient descent. The poor convergence then acts as a source of adversarial vulnerability when the dataset is inseparable in the on-manifold direction. We provide theoretical results for logistic regression and a 2-layer linear network on the considered data distribution. Furthermore, we advocate using second-order methods that are immune to ill-conditioning and lead to better robustness. We perform experiments and exhibit tremendous robustness improvements in clean training through long training and the employment of second-order methods, corroborating our framework. Additionally, we find the inclusion of batch-norm layers hinders such robustness gains. We attribute this to differing implicit biases between traditional and batch-normalized neural networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06921v1-abstract-full').style.display = 'none'; document.getElementById('2410.06921v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19613">arXiv:2409.19613</a> <span> [<a href="https://arxiv.org/pdf/2409.19613">pdf</a>, <a href="https://arxiv.org/format/2409.19613">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Mamba for Few-Shot Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qianxiong Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuanyi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lanyun Zhu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Long%2C+C">Cheng Long</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziyue Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Rui Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19613v1-abstract-short" style="display: inline;"> Many few-shot segmentation (FSS) methods use cross attention to fuse support foreground (FG) into query features, regardless of the quadratic complexity. A recent advance Mamba can also well capture intra-sequence dependencies, yet the complexity is only linear. Hence, we aim to devise a cross (attention-like) Mamba to capture inter-sequence dependencies for FSS. A simple idea is to scan on suppor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19613v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19613v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19613v1-abstract-full" style="display: none;"> Many few-shot segmentation (FSS) methods use cross attention to fuse support foreground (FG) into query features, regardless of the quadratic complexity. A recent advance Mamba can also well capture intra-sequence dependencies, yet the complexity is only linear. Hence, we aim to devise a cross (attention-like) Mamba to capture inter-sequence dependencies for FSS. A simple idea is to scan on support features to selectively compress them into the hidden state, which is then used as the initial hidden state to sequentially scan query features. Nevertheless, it suffers from (1) support forgetting issue: query features will also gradually be compressed when scanning on them, so the support features in hidden state keep reducing, and many query pixels cannot fuse sufficient support features; (2) intra-class gap issue: query FG is essentially more similar to itself rather than to support FG, i.e., query may prefer not to fuse support features but their own ones from the hidden state, yet the success of FSS relies on the effective use of support information. To tackle them, we design a hybrid Mamba network (HMNet), including (1) a support recapped Mamba to periodically recap the support features when scanning query, so the hidden state can always contain rich support information; (2) a query intercepted Mamba to forbid the mutual interactions among query pixels, and encourage them to fuse more support features from the hidden state. Consequently, the support information is better utilized, leading to better performance. Extensive experiments have been conducted on two public benchmarks, showing the superiority of HMNet. The code is available at https://github.com/Sam1224/HMNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19613v1-abstract-full').style.display = 'none'; document.getElementById('2409.19613v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by NIPS'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19580">arXiv:2409.19580</a> <span> [<a href="https://arxiv.org/pdf/2409.19580">pdf</a>, <a href="https://arxiv.org/format/2409.19580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> High Quality Human Image Animation using Regional Supervision and Motion Blur Condition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhongcong Xu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+C">Chaoyue Song</a>, <a href="/search/cs?searchtype=author&query=Song%2C+G">Guoxian Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Liew%2C+J+H">Jun Hao Liew</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hongyi Xu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">You Xie</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Linjie Luo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jiashi Feng</a>, <a href="/search/cs?searchtype=author&query=Shou%2C+M+Z">Mike Zheng Shou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19580v1-abstract-short" style="display: inline;"> Recent advances in video diffusion models have enabled realistic and controllable human image animation with temporal coherence. Although generating reasonable results, existing methods often overlook the need for regional supervision in crucial areas such as the face and hands, and neglect the explicit modeling for motion blur, leading to unrealistic low-quality synthesis. To address these limita… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19580v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19580v1-abstract-full" style="display: none;"> Recent advances in video diffusion models have enabled realistic and controllable human image animation with temporal coherence. Although generating reasonable results, existing methods often overlook the need for regional supervision in crucial areas such as the face and hands, and neglect the explicit modeling for motion blur, leading to unrealistic low-quality synthesis. To address these limitations, we first leverage regional supervision for detailed regions to enhance face and hand faithfulness. Second, we model the motion blur explicitly to further improve the appearance quality. Third, we explore novel training strategies for high-resolution human animation to improve the overall fidelity. Experimental results demonstrate that our proposed method outperforms state-of-the-art approaches, achieving significant improvements upon the strongest baseline by more than 21.0% and 57.4% in terms of reconstruction precision (L1) and perceptual quality (FVD) on HumanDance dataset. Code and model will be made available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19580v1-abstract-full').style.display = 'none'; document.getElementById('2409.19580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14666">arXiv:2409.14666</a> <span> [<a href="https://arxiv.org/pdf/2409.14666">pdf</a>, <a href="https://arxiv.org/format/2409.14666">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Semi-supervised Learning For Robust Speech Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huayun Zhang</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+J+H+M">Jeremy H. M. Wong</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Geyu Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+N+F">Nancy F. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14666v1-abstract-short" style="display: inline;"> Speech evaluation measures a learners oral proficiency using automatic models. Corpora for training such models often pose sparsity challenges given that there often is limited scored data from teachers, in addition to the score distribution across proficiency levels being often imbalanced among student cohorts. Automatic scoring is thus not robust when faced with under-represented samples or out-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14666v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14666v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14666v1-abstract-full" style="display: none;"> Speech evaluation measures a learners oral proficiency using automatic models. Corpora for training such models often pose sparsity challenges given that there often is limited scored data from teachers, in addition to the score distribution across proficiency levels being often imbalanced among student cohorts. Automatic scoring is thus not robust when faced with under-represented samples or out-of-distribution samples, which inevitably exist in real-world deployment scenarios. This paper proposes to address such challenges by exploiting semi-supervised pre-training and objective regularization to approximate subjective evaluation criteria. In particular, normalized mutual information is used to quantify the speech characteristics from the learner and the reference. An anchor model is trained using pseudo labels to predict the correctness of pronunciation. An interpolated loss function is proposed to minimize not only the prediction error with respect to ground-truth scores but also the divergence between two probability distributions estimated by the speech evaluation model and the anchor model. Compared to other state-of-the-art methods on a public data-set, this approach not only achieves high performance while evaluating the entire test-set as a whole, but also brings the most evenly distributed prediction error across distinct proficiency levels. Furthermore, empirical results show the model accuracy on out-of-distribution data also compares favorably with competitive baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14666v1-abstract-full').style.display = 'none'; document.getElementById('2409.14666v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10090">arXiv:2409.10090</a> <span> [<a href="https://arxiv.org/pdf/2409.10090">pdf</a>, <a href="https://arxiv.org/format/2409.10090">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MotionCom: Automatic and Motion-Aware Image Composition with LLM and Video Diffusion Prior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tao%2C+W">Weijing Tao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofeng Yang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+M">Miaomiao Cui</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10090v1-abstract-short" style="display: inline;"> This work presents MotionCom, a training-free motion-aware diffusion based image composition, enabling automatic and seamless integration of target objects into new scenes with dynamically coherent results without finetuning or optimization. Traditional approaches in this area suffer from two significant limitations: they require manual planning for object placement and often generate static compo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10090v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10090v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10090v1-abstract-full" style="display: none;"> This work presents MotionCom, a training-free motion-aware diffusion based image composition, enabling automatic and seamless integration of target objects into new scenes with dynamically coherent results without finetuning or optimization. Traditional approaches in this area suffer from two significant limitations: they require manual planning for object placement and often generate static compositions lacking motion realism. MotionCom addresses these issues by utilizing a Large Vision Language Model (LVLM) for intelligent planning, and a Video Diffusion prior for motion-infused image synthesis, streamlining the composition process. Our multi-modal Chain-of-Thought (CoT) prompting with LVLM automates the strategic placement planning of foreground objects, considering their potential motion and interaction within the scenes. Complementing this, we propose a novel method MotionPaint to distill motion-aware information from pretrained video diffusion models in the generation phase, ensuring that these objects are not only seamlessly integrated but also endowed with realistic motion. Extensive quantitative and qualitative results highlight MotionCom's superiority, showcasing its efficiency in streamlining the planning process and its capability to produce compositions that authentically depict motion and interaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10090v1-abstract-full').style.display = 'none'; document.getElementById('2409.10090v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06635">arXiv:2409.06635</a> <span> [<a href="https://arxiv.org/pdf/2409.06635">pdf</a>, <a href="https://arxiv.org/ps/2409.06635">ps</a>, <a href="https://arxiv.org/format/2409.06635">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MoWE-Audio: Multitask AudioLLMs with Mixture of Weak Encoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Shuo Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xunlong Zou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhuohan Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yingxu He</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Geyu Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+N+F">Nancy F. Chen</a>, <a href="/search/cs?searchtype=author&query=Aw%2C+A+T">Ai Ti Aw</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06635v3-abstract-short" style="display: inline;"> The rapid advancements in large language models (LLMs) have significantly enhanced natural language processing capabilities, facilitating the development of AudioLLMs that process and understand speech and audio inputs alongside text. Existing AudioLLMs typically combine a pre-trained audio encoder with a pre-trained LLM, which are subsequently finetuned on specific audio tasks. However, the pre-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06635v3-abstract-full').style.display = 'inline'; document.getElementById('2409.06635v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06635v3-abstract-full" style="display: none;"> The rapid advancements in large language models (LLMs) have significantly enhanced natural language processing capabilities, facilitating the development of AudioLLMs that process and understand speech and audio inputs alongside text. Existing AudioLLMs typically combine a pre-trained audio encoder with a pre-trained LLM, which are subsequently finetuned on specific audio tasks. However, the pre-trained audio encoder has constrained capacity to capture features for new tasks and datasets. To address this, we propose to incorporate mixtures of `weak' encoders (MoWE) into the AudioLLM framework. MoWE supplements a base encoder with a pool of relatively light weight encoders, selectively activated based on the audio input to enhance feature extraction without significantly increasing model size. Our empirical results demonstrate that MoWE effectively improves multi-task performance, broadening the applicability of AudioLLMs to more diverse audio tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06635v3-abstract-full').style.display = 'none'; document.getElementById('2409.06635v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05910">arXiv:2409.05910</a> <span> [<a href="https://arxiv.org/pdf/2409.05910">pdf</a>, <a href="https://arxiv.org/format/2409.05910">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Property Neurons in Self-Supervised Speech Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tzu-Quan Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guan-Ting Lin</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05910v2-abstract-short" style="display: inline;"> There have been many studies on analyzing self-supervised speech Transformers, in particular, with layer-wise analysis. It is, however, desirable to have an approach that can pinpoint exactly a subset of neurons that is responsible for a particular property of speech, being amenable to model pruning and model editing. In this work, we identify a set of property neurons in the feedforward layers of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05910v2-abstract-full').style.display = 'inline'; document.getElementById('2409.05910v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05910v2-abstract-full" style="display: none;"> There have been many studies on analyzing self-supervised speech Transformers, in particular, with layer-wise analysis. It is, however, desirable to have an approach that can pinpoint exactly a subset of neurons that is responsible for a particular property of speech, being amenable to model pruning and model editing. In this work, we identify a set of property neurons in the feedforward layers of Transformers to study how speech-related properties, such as phones, gender, and pitch, are stored. When removing neurons of a particular property (a simple form of model editing), the respective downstream performance significantly degrades, showing the importance of the property neurons. We apply this approach to pruning the feedforward layers in Transformers, where most of the model parameters are. We show that protecting property neurons during pruning is significantly more effective than norm-based pruning. The code for identifying property neurons is available at https://github.com/nervjack2/PropertyNeurons. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05910v2-abstract-full').style.display = 'none'; document.getElementById('2409.05910v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SLT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05380">arXiv:2409.05380</a> <span> [<a href="https://arxiv.org/pdf/2409.05380">pdf</a>, <a href="https://arxiv.org/format/2409.05380">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Prim2Room: Layout-Controllable Room Mesh Generation from Primitives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chengzeng Feng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jiacheng Wei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+P">Pan Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fayao Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongdong Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guosheng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05380v1-abstract-short" style="display: inline;"> We propose Prim2Room, a novel framework for controllable room mesh generation leveraging 2D layout conditions and 3D primitive retrieval to facilitate precise 3D layout specification. Diverging from existing methods that lack control and precision, our approach allows for detailed customization of room-scale environments. To overcome the limitations of previous methods, we introduce an adaptive vi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05380v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05380v1-abstract-full" style="display: none;"> We propose Prim2Room, a novel framework for controllable room mesh generation leveraging 2D layout conditions and 3D primitive retrieval to facilitate precise 3D layout specification. Diverging from existing methods that lack control and precision, our approach allows for detailed customization of room-scale environments. To overcome the limitations of previous methods, we introduce an adaptive viewpoint selection algorithm that allows the system to generate the furniture texture and geometry from more favorable views than predefined camera trajectories. Additionally, we employ non-rigid depth registration to ensure alignment between generated objects and their corresponding primitive while allowing for shape variations to maintain diversity. Our method not only enhances the accuracy and aesthetic appeal of generated 3D scenes but also provides a user-friendly platform for detailed room design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05380v1-abstract-full').style.display = 'none'; document.getElementById('2409.05380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04593">arXiv:2409.04593</a> <span> [<a href="https://arxiv.org/pdf/2409.04593">pdf</a>, <a href="https://arxiv.org/format/2409.04593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Paper Copilot: A Self-Evolving and Efficient LLM System for Personalized Academic Assistance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+G">Guanyu Lin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Han%2C+P">Pengrui Han</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Ge Liu</a>, <a href="/search/cs?searchtype=author&query=You%2C+J">Jiaxuan You</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04593v1-abstract-short" style="display: inline;"> As scientific research proliferates, researchers face the daunting task of navigating and reading vast amounts of literature. Existing solutions, such as document QA, fail to provide personalized and up-to-date information efficiently. We present Paper Copilot, a self-evolving, efficient LLM system designed to assist researchers, based on thought-retrieval, user profile and high performance optimi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04593v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04593v1-abstract-full" style="display: none;"> As scientific research proliferates, researchers face the daunting task of navigating and reading vast amounts of literature. Existing solutions, such as document QA, fail to provide personalized and up-to-date information efficiently. We present Paper Copilot, a self-evolving, efficient LLM system designed to assist researchers, based on thought-retrieval, user profile and high performance optimization. Specifically, Paper Copilot can offer personalized research services, maintaining a real-time updated database. Quantitative evaluation demonstrates that Paper Copilot saves 69.92\% of time after efficient deployment. This paper details the design and implementation of Paper Copilot, highlighting its contributions to personalized academic support and its potential to streamline the research process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04593v1-abstract-full').style.display = 'none'; document.getElementById('2409.04593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lin%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Lin%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository