CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 3,520 results for author: <span class="mathjax">Wu, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Wu%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wu, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wu%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wu, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13280">arXiv:2411.13280</a> <span> [<a href="https://arxiv.org/pdf/2411.13280">pdf</a>, <a href="https://arxiv.org/format/2411.13280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Structure-Based Molecule Optimization via Gradient-Guided Bayesian Update </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+K">Keyue Qiu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuxuan Song</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jie Yu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+H">Hongbo Ma</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Ziyao Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhilong Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yushuai Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+M">Mingyue Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wei-Ying Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13280v2-abstract-short" style="display: inline;"> Structure-based molecule optimization (SBMO) aims to optimize molecules with both continuous coordinates and discrete types against protein targets. A promising direction is to exert gradient guidance on generative models given its remarkable success in images, but it is challenging to guide discrete data and risks inconsistencies between modalities. To this end, we leverage a continuous and diffe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13280v2-abstract-full').style.display = 'inline'; document.getElementById('2411.13280v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13280v2-abstract-full" style="display: none;"> Structure-based molecule optimization (SBMO) aims to optimize molecules with both continuous coordinates and discrete types against protein targets. A promising direction is to exert gradient guidance on generative models given its remarkable success in images, but it is challenging to guide discrete data and risks inconsistencies between modalities. To this end, we leverage a continuous and differentiable space derived through Bayesian inference, presenting Molecule Joint Optimization (MolJO), the first gradient-based SBMO framework that facilitates joint guidance signals across different modalities while preserving SE(3)-equivariance. We introduce a novel backward correction strategy that optimizes within a sliding window of the past histories, allowing for a seamless trade-off between explore-and-exploit during optimization. Our proposed MolJO achieves state-of-the-art performance on CrossDocked2020 benchmark (Success Rate 51.3% , Vina Dock -9.05 and SA 0.78), more than 4x improvement in Success Rate compared to the gradient-based counterpart, and 2x "Me-Better" Ratio as much as 3D baselines. Furthermore, we extend MolJO to a wide range of optimization settings, including multi-objective optimization and challenging tasks in drug design such as R-group optimization and scaffold hopping, further underscoring its versatility and potential. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13280v2-abstract-full').style.display = 'none'; document.getElementById('2411.13280v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13079">arXiv:2411.13079</a> <span> [<a href="https://arxiv.org/pdf/2411.13079">pdf</a>, <a href="https://arxiv.org/format/2411.13079">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Neural Internal Model Control: Learning a Robust Control Policy via Predictive Error Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feng Gao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chao Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yi Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13079v1-abstract-short" style="display: inline;"> Accurate motion control in the face of disturbances within complex environments remains a major challenge in robotics. Classical model-based approaches often struggle with nonlinearities and unstructured disturbances, while RL-based methods can be fragile when encountering unseen scenarios. In this paper, we propose a novel framework, Neural Internal Model Control, which integrates model-based con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13079v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13079v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13079v1-abstract-full" style="display: none;"> Accurate motion control in the face of disturbances within complex environments remains a major challenge in robotics. Classical model-based approaches often struggle with nonlinearities and unstructured disturbances, while RL-based methods can be fragile when encountering unseen scenarios. In this paper, we propose a novel framework, Neural Internal Model Control, which integrates model-based control with RL-based control to enhance robustness. Our framework streamlines the predictive model by applying Newton-Euler equations for rigid-body dynamics, eliminating the need to capture complex high-dimensional nonlinearities. This internal model combines model-free RL algorithms with predictive error feedback. Such a design enables a closed-loop control structure to enhance the robustness and generalizability of the control system. We demonstrate the effectiveness of our framework on both quadrotors and quadrupedal robots, achieving superior performance compared to state-of-the-art methods. Furthermore, real-world deployment on a quadrotor with rope-suspended payloads highlights the framework's robustness in sim-to-real transfer. Our code is released at https://github.com/thu-uav/NeuralIMC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13079v1-abstract-full').style.display = 'none'; document.getElementById('2411.13079v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to RAL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13069">arXiv:2411.13069</a> <span> [<a href="https://arxiv.org/pdf/2411.13069">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Automatic marker-free registration based on similar tetrahedras for single-tree point clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jing Ren</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pei Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hanlong Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuhan Wu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yuhang Gao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenxin Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mingtai Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lingyun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13069v1-abstract-short" style="display: inline;"> In recent years, terrestrial laser scanning technology has been widely used to collect tree point cloud data, aiding in measurements of diameter at breast height, biomass, and other forestry survey data. Since a single scan from terrestrial laser systems captures data from only one angle, multiple scans must be registered and fused to obtain complete tree point cloud data. This paper proposes a ma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13069v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13069v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13069v1-abstract-full" style="display: none;"> In recent years, terrestrial laser scanning technology has been widely used to collect tree point cloud data, aiding in measurements of diameter at breast height, biomass, and other forestry survey data. Since a single scan from terrestrial laser systems captures data from only one angle, multiple scans must be registered and fused to obtain complete tree point cloud data. This paper proposes a marker-free automatic registration method for single-tree point clouds based on similar tetrahedras. First, two point clouds from two scans of the same tree are used to generate tree skeletons, and key point sets are constructed from these skeletons. Tetrahedra are then filtered and matched according to similarity principles, with the vertices of these two matched tetrahedras selected as matching point pairs, thus completing the coarse registration of the point clouds from the two scans. Subsequently, the ICP method is applied to the coarse-registered leaf point clouds to obtain fine registration parameters, completing the precise registration of the two tree point clouds. Experiments were conducted using terrestrial laser scanning data from eight trees, each from different species and with varying shapes. The proposed method was evaluated using RMSE and Hausdorff distance, compared against the traditional ICP and NDT methods. The experimental results demonstrate that the proposed method significantly outperforms both ICP and NDT in registration accuracy, achieving speeds up to 593 times and 113 times faster than ICP and NDT, respectively. In summary, the proposed method shows good robustness in single-tree point cloud registration, with significant advantages in accuracy and speed compared to traditional ICP and NDT methods, indicating excellent application prospects in practical registration scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13069v1-abstract-full').style.display = 'none'; document.getElementById('2411.13069v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">remote sensing; terrestrial lidar; multi-scan cloud registration</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13042">arXiv:2411.13042</a> <span> [<a href="https://arxiv.org/pdf/2411.13042">pdf</a>, <a href="https://arxiv.org/format/2411.13042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Attentive Contextual Attention for Cloud Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenli Huang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Y">Ye Deng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yang Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinjun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13042v1-abstract-short" style="display: inline;"> Cloud cover can significantly hinder the use of remote sensing images for Earth observation, prompting urgent advancements in cloud removal technology. Recently, deep learning strategies have shown strong potential in restoring cloud-obscured areas. These methods utilize convolution to extract intricate local features and attention mechanisms to gather long-range information, improving the overall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13042v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13042v1-abstract-full" style="display: none;"> Cloud cover can significantly hinder the use of remote sensing images for Earth observation, prompting urgent advancements in cloud removal technology. Recently, deep learning strategies have shown strong potential in restoring cloud-obscured areas. These methods utilize convolution to extract intricate local features and attention mechanisms to gather long-range information, improving the overall comprehension of the scene. However, a common drawback of these approaches is that the resulting images often suffer from blurriness, artifacts, and inconsistencies. This is partly because attention mechanisms apply weights to all features based on generalized similarity scores, which can inadvertently introduce noise and irrelevant details from cloud-covered areas. To overcome this limitation and better capture relevant distant context, we introduce a novel approach named Attentive Contextual Attention (AC-Attention). This method enhances conventional attention mechanisms by dynamically learning data-driven attentive selection scores, enabling it to filter out noise and irrelevant features effectively. By integrating the AC-Attention module into the DSen2-CR cloud removal framework, we significantly improve the model's ability to capture essential distant information, leading to more effective cloud removal. Our extensive evaluation of various datasets shows that our method outperforms existing ones regarding image reconstruction quality. Additionally, we conducted ablation studies by integrating AC-Attention into multiple existing methods and widely used network architectures. These studies demonstrate the effectiveness and adaptability of AC-Attention and reveal its ability to focus on relevant features, thereby improving the overall performance of the networks. The code is available at \url{https://github.com/huangwenwenlili/ACA-CRNet}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13042v1-abstract-full').style.display = 'none'; document.getElementById('2411.13042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12547">arXiv:2411.12547</a> <span> [<a href="https://arxiv.org/pdf/2411.12547">pdf</a>, <a href="https://arxiv.org/format/2411.12547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> S3TU-Net: Structured Convolution and Superpixel Transformer for Lung Nodule Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuke Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yunyu Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinyi Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhenglei Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">YuQing Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S+H">Shuo Hong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12547v1-abstract-short" style="display: inline;"> The irregular and challenging characteristics of lung adenocarcinoma nodules in computed tomography (CT) images complicate staging diagnosis, making accurate segmentation critical for clinicians to extract detailed lesion information. In this study, we propose a segmentation model, S3TU-Net, which integrates multi-dimensional spatial connectors and a superpixel-based visual transformer. S3TU-Net i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12547v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12547v1-abstract-full" style="display: none;"> The irregular and challenging characteristics of lung adenocarcinoma nodules in computed tomography (CT) images complicate staging diagnosis, making accurate segmentation critical for clinicians to extract detailed lesion information. In this study, we propose a segmentation model, S3TU-Net, which integrates multi-dimensional spatial connectors and a superpixel-based visual transformer. S3TU-Net is built on a multi-view CNN-Transformer hybrid architecture, incorporating superpixel algorithms, structured weighting, and spatial shifting techniques to achieve superior segmentation performance. The model leverages structured convolution blocks (DWF-Conv/D2BR-Conv) to extract multi-scale local features while mitigating overfitting. To enhance multi-scale feature fusion, we introduce the S2-MLP Link, integrating spatial shifting and attention mechanisms at the skip connections. Additionally, the residual-based superpixel visual transformer (RM-SViT) effectively merges global and local features by employing sparse correlation learning and multi-branch attention to capture long-range dependencies, with residual connections enhancing stability and computational efficiency. Experimental results on the LIDC-IDRI dataset demonstrate that S3TU-Net achieves a DSC, precision, and IoU of 89.04%, 90.73%, and 90.70%, respectively. Compared to recent methods, S3TU-Net improves DSC by 4.52% and sensitivity by 3.16%, with other metrics showing an approximate 2% increase. In addition to comparison and ablation studies, we validated the generalization ability of our model on the EPDB private dataset, achieving a DSC of 86.40%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12547v1-abstract-full').style.display = 'none'; document.getElementById('2411.12547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11515">arXiv:2411.11515</a> <span> [<a href="https://arxiv.org/pdf/2411.11515">pdf</a>, <a href="https://arxiv.org/format/2411.11515">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cascaded Diffusion Models for 2D and 3D Microscopy Image Synthesis to Enhance Cell Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yilmaz%2C+R">R眉veyda Yilmaz</a>, <a href="/search/cs?searchtype=author&query=Keven%2C+K">Kaan Keven</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuli Wu</a>, <a href="/search/cs?searchtype=author&query=Stegmaier%2C+J">Johannes Stegmaier</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11515v2-abstract-short" style="display: inline;"> Automated cell segmentation in microscopy images is essential for biomedical research, yet conventional methods are labor-intensive and prone to error. While deep learning-based approaches have proven effective, they often require large annotated datasets, which are scarce due to the challenges of manual annotation. To overcome this, we propose a novel framework for synthesizing densely annotated… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11515v2-abstract-full').style.display = 'inline'; document.getElementById('2411.11515v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11515v2-abstract-full" style="display: none;"> Automated cell segmentation in microscopy images is essential for biomedical research, yet conventional methods are labor-intensive and prone to error. While deep learning-based approaches have proven effective, they often require large annotated datasets, which are scarce due to the challenges of manual annotation. To overcome this, we propose a novel framework for synthesizing densely annotated 2D and 3D cell microscopy images using cascaded diffusion models. Our method synthesizes 2D and 3D cell masks from sparse 2D annotations using multi-level diffusion models and NeuS, a 3D surface reconstruction approach. Following that, a pretrained 2D Stable Diffusion model is finetuned to generate realistic cell textures and the final outputs are combined to form cell populations. We show that training a segmentation model with a combination of our synthetic data and real data improves cell segmentation performance by up to 9\% across multiple datasets. Additionally, the FID scores indicate that the synthetic data closely resembles real data. The code for our proposed approach will be available at https://github.com/ruveydayilmaz0/cascaded_diffusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11515v2-abstract-full').style.display = 'none'; document.getElementById('2411.11515v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11479">arXiv:2411.11479</a> <span> [<a href="https://arxiv.org/pdf/2411.11479">pdf</a>, <a href="https://arxiv.org/format/2411.11479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Quantifying Preferences of Vision-Language Models via Value Decomposition in Social Media Contexts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingxuan Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuning Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shengqi Yang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yizhou Zhao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y+N">Ying Nian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11479v1-abstract-short" style="display: inline;"> The rapid advancement of Vision-Language Models (VLMs) has expanded multimodal applications, yet evaluations often focus on basic tasks like object recognition, overlooking abstract aspects such as personalities and values. To address this gap, we introduce Value-Spectrum, a visual question-answering benchmark aimed at assessing VLMs based on Schwartz's value dimensions, which capture core values… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11479v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11479v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11479v1-abstract-full" style="display: none;"> The rapid advancement of Vision-Language Models (VLMs) has expanded multimodal applications, yet evaluations often focus on basic tasks like object recognition, overlooking abstract aspects such as personalities and values. To address this gap, we introduce Value-Spectrum, a visual question-answering benchmark aimed at assessing VLMs based on Schwartz's value dimensions, which capture core values guiding people's beliefs and actions across cultures. We constructed a vectorized database of over 50,000 short videos sourced from TikTok, YouTube Shorts, and Instagram Reels, covering multiple months and a wide array of topics such as family, health, hobbies, society, and technology. We also developed a VLM agent pipeline to automate video browsing and analysis. Benchmarking representative VLMs on Value-Spectrum reveals significant differences in their responses to value-oriented content, with most models exhibiting a preference for hedonistic topics. Beyond identifying natural preferences, we explored the ability of VLM agents to adopt specific personas when explicitly prompted, revealing insights into the models' adaptability in role-playing scenarios. These findings highlight the potential of Value-Spectrum as a comprehensive evaluation set for tracking VLM advancements in value-based tasks and for developing more sophisticated role-playing AI agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11479v1-abstract-full').style.display = 'none'; document.getElementById('2411.11479v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11262">arXiv:2411.11262</a> <span> [<a href="https://arxiv.org/pdf/2411.11262">pdf</a>, <a href="https://arxiv.org/format/2411.11262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Cross-Patient Pseudo Bags Generation and Curriculum Contrastive Learning for Imbalanced Multiclassification of Whole Slide Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yonghuang Wu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xuan Xie</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+X">Xinyuan Niu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chengqian Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jinhua Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11262v1-abstract-short" style="display: inline;"> Pathology computing has dramatically improved pathologists' workflow and diagnostic decision-making processes. Although computer-aided diagnostic systems have shown considerable value in whole slide image (WSI) analysis, the problem of multi-classification under sample imbalance remains an intractable challenge. To address this, we propose learning fine-grained information by generating sub-bags w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11262v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11262v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11262v1-abstract-full" style="display: none;"> Pathology computing has dramatically improved pathologists' workflow and diagnostic decision-making processes. Although computer-aided diagnostic systems have shown considerable value in whole slide image (WSI) analysis, the problem of multi-classification under sample imbalance remains an intractable challenge. To address this, we propose learning fine-grained information by generating sub-bags with feature distributions similar to the original WSIs. Additionally, we utilize a pseudo-bag generation algorithm to further leverage the abundant and redundant information in WSIs, allowing efficient training in unbalanced-sample multi-classification tasks. Furthermore, we introduce an affinity-based sample selection and curriculum contrastive learning strategy to enhance the stability of model representation learning. Unlike previous approaches, our framework transitions from learning bag-level representations to understanding and exploiting the feature distribution of multi-instance bags. Our method demonstrates significant performance improvements on three datasets, including tumor classification and lymph node metastasis. On average, it achieves a 4.39-point improvement in F1 score compared to the second-best method across the three tasks, underscoring its superior performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11262v1-abstract-full').style.display = 'none'; document.getElementById('2411.11262v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11006">arXiv:2411.11006</a> <span> [<a href="https://arxiv.org/pdf/2411.11006">pdf</a>, <a href="https://arxiv.org/format/2411.11006">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BackdoorMBTI: A Backdoor Learning Multimodal Benchmark Tool Kit for Backdoor Defense Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+H">Haiyang Yu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+J">Jiaping Gui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+P">Ping Yi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yue Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11006v1-abstract-short" style="display: inline;"> We introduce BackdoorMBTI, the first backdoor learning toolkit and benchmark designed for multimodal evaluation across three representative modalities from eleven commonly used datasets. BackdoorMBTI provides a systematic backdoor learning pipeline, encompassing data processing, data poisoning, backdoor training, and evaluation. The generated poison datasets and backdoor models enable detailed eva… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11006v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11006v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11006v1-abstract-full" style="display: none;"> We introduce BackdoorMBTI, the first backdoor learning toolkit and benchmark designed for multimodal evaluation across three representative modalities from eleven commonly used datasets. BackdoorMBTI provides a systematic backdoor learning pipeline, encompassing data processing, data poisoning, backdoor training, and evaluation. The generated poison datasets and backdoor models enable detailed evaluation of backdoor defense methods. Given the diversity of modalities, BackdoorMBTI facilitates systematic evaluation across different data types. Furthermore, BackdoorMBTI offers a standardized approach to handling practical factors in backdoor learning, such as issues related to data quality and erroneous labels. We anticipate that BackdoorMBTI will expedite future research in backdoor defense methods within a multimodal context. Code is available at https://anonymous.4open.science/r/BackdoorMBTI-D6A1/README.md. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11006v1-abstract-full').style.display = 'none'; document.getElementById('2411.11006v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10943">arXiv:2411.10943</a> <span> [<a href="https://arxiv.org/pdf/2411.10943">pdf</a>, <a href="https://arxiv.org/format/2411.10943">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Generalist Virtual Agents: A Survey on Autonomous Agents Across Digital Platforms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+M">Minghe Gao</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+W">Wendong Bu</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+B">Bingchen Miao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yang Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunfei Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Juncheng Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Siliang Tang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qi Wu</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+Y">Yueting Zhuang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Meng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10943v1-abstract-short" style="display: inline;"> In this paper, we introduce the Generalist Virtual Agent (GVA), an autonomous entity engineered to function across diverse digital platforms and environments, assisting users by executing a variety of tasks. This survey delves into the evolution of GVAs, tracing their progress from early intelligent assistants to contemporary implementations that incorporate large-scale models. We explore both the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10943v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10943v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10943v1-abstract-full" style="display: none;"> In this paper, we introduce the Generalist Virtual Agent (GVA), an autonomous entity engineered to function across diverse digital platforms and environments, assisting users by executing a variety of tasks. This survey delves into the evolution of GVAs, tracing their progress from early intelligent assistants to contemporary implementations that incorporate large-scale models. We explore both the philosophical underpinnings and practical foundations of GVAs, addressing their developmental challenges and the methodologies currently employed in their design and operation. By presenting a detailed taxonomy of GVA environments, tasks, and capabilities, this paper aims to bridge the theoretical and practical aspects of GVAs, concluding those that operate in environments closely mirroring the real world are more likely to demonstrate human-like intelligence. We discuss potential future directions for GVA research, highlighting the necessity for realistic evaluation metrics and the enhancement of long-sequence decision-making capabilities to advance the field toward more systematic or embodied applications. This work not only synthesizes the existing body of literature but also proposes frameworks for future investigations, contributing significantly to the ongoing development of intelligent systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10943v1-abstract-full').style.display = 'none'; document.getElementById('2411.10943v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10596">arXiv:2411.10596</a> <span> [<a href="https://arxiv.org/pdf/2411.10596">pdf</a>, <a href="https://arxiv.org/format/2411.10596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A minimalistic representation model for head direction system </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Minglu Zhao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Dehong Xu</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+D">Deqian Kong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wen-Hao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y+N">Ying Nian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10596v1-abstract-short" style="display: inline;"> We present a minimalistic representation model for the head direction (HD) system, aiming to learn a high-dimensional representation of head direction that captures essential properties of HD cells. Our model is a representation of rotation group $U(1)$, and we study both the fully connected version and convolutional version. We demonstrate the emergence of Gaussian-like tuning profiles and a 2D c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10596v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10596v1-abstract-full" style="display: none;"> We present a minimalistic representation model for the head direction (HD) system, aiming to learn a high-dimensional representation of head direction that captures essential properties of HD cells. Our model is a representation of rotation group $U(1)$, and we study both the fully connected version and convolutional version. We demonstrate the emergence of Gaussian-like tuning profiles and a 2D circle geometry in both versions of the model. We also demonstrate that the learned model is capable of accurate path integration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10596v1-abstract-full').style.display = 'none'; document.getElementById('2411.10596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Workshop on Symmetry and Geometry in Neural Representations (NeurReps) at NeurIPS 2024, Extended Abstract Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10499">arXiv:2411.10499</a> <span> [<a href="https://arxiv.org/pdf/2411.10499">pdf</a>, <a href="https://arxiv.org/format/2411.10499">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FitDiT: Advancing the Authentic Garment Details for High-fidelity Virtual Try-on </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+B">Boyuan Jiang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaobin Hu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Donghao Luo</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Qingdong He</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chengming Xu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jinlong Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yunsheng Wu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yanwei Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10499v1-abstract-short" style="display: inline;"> Although image-based virtual try-on has made considerable progress, emerging approaches still encounter challenges in producing high-fidelity and robust fitting images across diverse scenarios. These methods often struggle with issues such as texture-aware maintenance and size-aware fitting, which hinder their overall effectiveness. To address these limitations, we propose a novel garment percepti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10499v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10499v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10499v1-abstract-full" style="display: none;"> Although image-based virtual try-on has made considerable progress, emerging approaches still encounter challenges in producing high-fidelity and robust fitting images across diverse scenarios. These methods often struggle with issues such as texture-aware maintenance and size-aware fitting, which hinder their overall effectiveness. To address these limitations, we propose a novel garment perception enhancement technique, termed FitDiT, designed for high-fidelity virtual try-on using Diffusion Transformers (DiT) allocating more parameters and attention to high-resolution features. First, to further improve texture-aware maintenance, we introduce a garment texture extractor that incorporates garment priors evolution to fine-tune garment feature, facilitating to better capture rich details such as stripes, patterns, and text. Additionally, we introduce frequency-domain learning by customizing a frequency distance loss to enhance high-frequency garment details. To tackle the size-aware fitting issue, we employ a dilated-relaxed mask strategy that adapts to the correct length of garments, preventing the generation of garments that fill the entire mask area during cross-category try-on. Equipped with the above design, FitDiT surpasses all baselines in both qualitative and quantitative evaluations. It excels in producing well-fitting garments with photorealistic and intricate details, while also achieving competitive inference times of 4.57 seconds for a single 1024x768 image after DiT structure slimming, outperforming existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10499v1-abstract-full').style.display = 'none'; document.getElementById('2411.10499v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project link: https://byjiang.com/FitDiT/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10463">arXiv:2411.10463</a> <span> [<a href="https://arxiv.org/pdf/2411.10463">pdf</a>, <a href="https://arxiv.org/format/2411.10463">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unexploited Information Value in Human-AI Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Ziyang Guo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yifan Wu</a>, <a href="/search/cs?searchtype=author&query=Hartline%2C+J">Jason Hartline</a>, <a href="/search/cs?searchtype=author&query=Hullman%2C+J">Jessica Hullman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10463v1-abstract-short" style="display: inline;"> Humans and AIs are often paired on decision tasks with the expectation of achieving complementary performance -- where the combination of human and AI outperforms either one alone. However, how to improve performance of a human-AI team is often not clear without knowing more about what particular information and strategies each agent employs. In this paper, we propose a model based in statisticall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10463v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10463v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10463v1-abstract-full" style="display: none;"> Humans and AIs are often paired on decision tasks with the expectation of achieving complementary performance -- where the combination of human and AI outperforms either one alone. However, how to improve performance of a human-AI team is often not clear without knowing more about what particular information and strategies each agent employs. In this paper, we propose a model based in statistically decision theory to analyze human-AI collaboration from the perspective of what information could be used to improve a human or AI decision. We demonstrate our model on a deepfake detection task to investigate seven video-level features by their unexploited value of information. We compare the human alone, AI alone and human-AI team and offer insights on how the AI assistance impacts people's usage of the information and what information that the AI exploits well might be useful for improving human decisions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10463v1-abstract-full').style.display = 'none'; document.getElementById('2411.10463v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10332">arXiv:2411.10332</a> <span> [<a href="https://arxiv.org/pdf/2411.10332">pdf</a>, <a href="https://arxiv.org/format/2411.10332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Number it: Temporal Grounding Videos like Flipping Manga </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yongliang Wu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xinting Hu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yuyang Sun</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yizhou Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+W">Wenbo Zhu</a>, <a href="/search/cs?searchtype=author&query=Rao%2C+F">Fengyun Rao</a>, <a href="/search/cs?searchtype=author&query=Schiele%2C+B">Bernt Schiele</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10332v1-abstract-short" style="display: inline;"> Video Large Language Models (Vid-LLMs) have made remarkable advancements in comprehending video content for QA dialogue. However, they struggle to extend this visual understanding to tasks requiring precise temporal localization, known as Video Temporal Grounding (VTG). To address this gap, we introduce Number-Prompt (NumPro), a novel method that empowers Vid-LLMs to bridge visual comprehension wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10332v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10332v1-abstract-full" style="display: none;"> Video Large Language Models (Vid-LLMs) have made remarkable advancements in comprehending video content for QA dialogue. However, they struggle to extend this visual understanding to tasks requiring precise temporal localization, known as Video Temporal Grounding (VTG). To address this gap, we introduce Number-Prompt (NumPro), a novel method that empowers Vid-LLMs to bridge visual comprehension with temporal grounding by adding unique numerical identifiers to each video frame. Treating a video as a sequence of numbered frame images, NumPro transforms VTG into an intuitive process: flipping through manga panels in sequence. This allows Vid-LLMs to "read" event timelines, accurately linking visual content with corresponding temporal information. Our experiments demonstrate that NumPro significantly boosts VTG performance of top-tier Vid-LLMs without additional computational cost. Furthermore, fine-tuning on a NumPro-enhanced dataset defines a new state-of-the-art for VTG, surpassing previous top-performing methods by up to 6.9\% in mIoU for moment retrieval and 8.5\% in mAP for highlight detection. The code will be available at https://github.com/yongliang-wu/NumPro. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10332v1-abstract-full').style.display = 'none'; document.getElementById('2411.10332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10069">arXiv:2411.10069</a> <span> [<a href="https://arxiv.org/pdf/2411.10069">pdf</a>, <a href="https://arxiv.org/format/2411.10069">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Layer Importance and Hallucination Analysis in Large Language Models via Enhanced Activation Variance-Sparsity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zichen Song</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Sitan Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuxin Wu</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Z">Zhongfeng Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10069v1-abstract-short" style="display: inline;"> Evaluating the importance of different layers in large language models (LLMs) is crucial for optimizing model performance and interpretability. This paper first explores layer importance using the Activation Variance-Sparsity Score (AVSS), which combines normalized activation variance and sparsity to quantify each layer's contribution to overall model performance. By ranking layers based on AVSS a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10069v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10069v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10069v1-abstract-full" style="display: none;"> Evaluating the importance of different layers in large language models (LLMs) is crucial for optimizing model performance and interpretability. This paper first explores layer importance using the Activation Variance-Sparsity Score (AVSS), which combines normalized activation variance and sparsity to quantify each layer's contribution to overall model performance. By ranking layers based on AVSS and pruning the least impactful 25\%, our experiments on tasks such as question answering, language modeling, and sentiment classification show that over 90\% of the original performance is retained, highlighting potential redundancies in LLM architectures. Building on AVSS, we propose an enhanced version tailored to assess hallucination propensity across layers (EAVSS). This improved approach introduces Hallucination-Specific Activation Variance (HSAV) and Hallucination-Specific Sparsity (HSS) metrics, allowing precise identification of hallucination-prone layers. By incorporating contrastive learning on these layers, we effectively mitigate hallucination generation, contributing to more robust and efficient LLMs(The maximum performance improvement is 12\%). Our results on the NQ, SciQ, TriviaQA, TruthfulQA, and WikiQA datasets demonstrate the efficacy of this method, offering a comprehensive framework for both layer importance evaluation and hallucination mitigation in LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10069v1-abstract-full').style.display = 'none'; document.getElementById('2411.10069v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09686">arXiv:2411.09686</a> <span> [<a href="https://arxiv.org/pdf/2411.09686">pdf</a>, <a href="https://arxiv.org/format/2411.09686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conditional regression for the Nonlinear Single-Variable Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yantao Wu</a>, <a href="/search/cs?searchtype=author&query=Maggioni%2C+M">Mauro Maggioni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09686v1-abstract-short" style="display: inline;"> Several statistical models for regression of a function $F$ on $\mathbb{R}^d$ without the statistical and computational curse of dimensionality exist, for example by imposing and exploiting geometric assumptions on the distribution of the data (e.g. that its support is low-dimensional), or strong smoothness assumptions on $F$, or a special structure $F$. Among the latter, compositional models assu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09686v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09686v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09686v1-abstract-full" style="display: none;"> Several statistical models for regression of a function $F$ on $\mathbb{R}^d$ without the statistical and computational curse of dimensionality exist, for example by imposing and exploiting geometric assumptions on the distribution of the data (e.g. that its support is low-dimensional), or strong smoothness assumptions on $F$, or a special structure $F$. Among the latter, compositional models assume $F=f\circ g$ with $g$ mapping to $\mathbb{R}^r$ with $r\ll d$, have been studied, and include classical single- and multi-index models and recent works on neural networks. While the case where $g$ is linear is rather well-understood, much less is known when $g$ is nonlinear, and in particular for which $g$'s the curse of dimensionality in estimating $F$, or both $f$ and $g$, may be circumvented. In this paper, we consider a model $F(X):=f(螤_纬X) $ where $螤_纬:\mathbb{R}^d\to[0,\rm{len}_纬]$ is the closest-point projection onto the parameter of a regular curve $纬: [0,\rm{len}_纬]\to\mathbb{R}^d$ and $f:[0,\rm{len}_纬]\to\mathbb{R}^1$. The input data $X$ is not low-dimensional, far from $纬$, conditioned on $螤_纬(X)$ being well-defined. The distribution of the data, $纬$ and $f$ are unknown. This model is a natural nonlinear generalization of the single-index model, which corresponds to $纬$ being a line. We propose a nonparametric estimator, based on conditional regression, and show that under suitable assumptions, the strongest of which being that $f$ is coarsely monotone, it can achieve the $one$-$dimensional$ optimal min-max rate for non-parametric regression, up to the level of noise in the observations, and be constructed in time $\mathcal{O}(d^2n\log n)$. All the constants in the learning bounds, in the minimal number of samples required for our bounds to hold, and in the computational complexity are at most low-order polynomials in $d$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09686v1-abstract-full').style.display = 'none'; document.getElementById('2411.09686v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">55 pages, 10 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62G08 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09618">arXiv:2411.09618</a> <span> [<a href="https://arxiv.org/pdf/2411.09618">pdf</a>, <a href="https://arxiv.org/format/2411.09618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.59275/j.melba.2024-9c68">10.59275/j.melba.2024-9c68 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MICCAI-CDMRI 2023 QuantConn Challenge Findings on Achieving Robust Quantitative Connectivity through Harmonized Preprocessing of Diffusion MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Newlin%2C+N+R">Nancy R. Newlin</a>, <a href="/search/cs?searchtype=author&query=Schilling%2C+K">Kurt Schilling</a>, <a href="/search/cs?searchtype=author&query=Koudoro%2C+S">Serge Koudoro</a>, <a href="/search/cs?searchtype=author&query=Chandio%2C+B+Q">Bramsh Qamar Chandio</a>, <a href="/search/cs?searchtype=author&query=Kanakaraj%2C+P">Praitayini Kanakaraj</a>, <a href="/search/cs?searchtype=author&query=Moyer%2C+D">Daniel Moyer</a>, <a href="/search/cs?searchtype=author&query=Kelly%2C+C+E">Claire E. Kelly</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+S">Sila Genc</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J+Y">Joseph Yuan-Mou Yang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Ye Wu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yifei He</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiawei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingrun Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/cs?searchtype=author&query=Adluru%2C+N">Nagesh Adluru</a>, <a href="/search/cs?searchtype=author&query=Nath%2C+V">Vishwesh Nath</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+S">Sudhir Pathak</a>, <a href="/search/cs?searchtype=author&query=Schneider%2C+W">Walter Schneider</a>, <a href="/search/cs?searchtype=author&query=Gade%2C+A">Anurag Gade</a>, <a href="/search/cs?searchtype=author&query=Rathi%2C+Y">Yogesh Rathi</a>, <a href="/search/cs?searchtype=author&query=Hendriks%2C+T">Tom Hendriks</a>, <a href="/search/cs?searchtype=author&query=Vilanova%2C+A">Anna Vilanova</a>, <a href="/search/cs?searchtype=author&query=Chamberland%2C+M">Maxime Chamberland</a>, <a href="/search/cs?searchtype=author&query=Pieciak%2C+T">Tomasz Pieciak</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09618v1-abstract-short" style="display: inline;"> White matter alterations are increasingly implicated in neurological diseases and their progression. International-scale studies use diffusion-weighted magnetic resonance imaging (DW-MRI) to qualitatively identify changes in white matter microstructure and connectivity. Yet, quantitative analysis of DW-MRI data is hindered by inconsistencies stemming from varying acquisition protocols. There is a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09618v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09618v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09618v1-abstract-full" style="display: none;"> White matter alterations are increasingly implicated in neurological diseases and their progression. International-scale studies use diffusion-weighted magnetic resonance imaging (DW-MRI) to qualitatively identify changes in white matter microstructure and connectivity. Yet, quantitative analysis of DW-MRI data is hindered by inconsistencies stemming from varying acquisition protocols. There is a pressing need to harmonize the preprocessing of DW-MRI datasets to ensure the derivation of robust quantitative diffusion metrics across acquisitions. In the MICCAI-CDMRI 2023 QuantConn challenge, participants were provided raw data from the same individuals collected on the same scanner but with two different acquisitions and tasked with preprocessing the DW-MRI to minimize acquisition differences while retaining biological variation. Submissions are evaluated on the reproducibility and comparability of cross-acquisition bundle-wise microstructure measures, bundle shape features, and connectomics. The key innovations of the QuantConn challenge are that (1) we assess bundles and tractography in the context of harmonization for the first time, (2) we assess connectomics in the context of harmonization for the first time, and (3) we have 10x additional subjects over prior harmonization challenge, MUSHAC and 100x over SuperMUDI. We find that bundle surface area, fractional anisotropy, connectome assortativity, betweenness centrality, edge count, modularity, nodal strength, and participation coefficient measures are most biased by acquisition and that machine learning voxel-wise correction, RISH mapping, and NeSH methods effectively reduce these biases. In addition, microstructure measures AD, MD, RD, bundle length, connectome density, efficiency, and path length are least biased by these acquisition differences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09618v1-abstract-full').style.display = 'none'; document.getElementById('2411.09618v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at the Journal of Machine Learning for Biomedical Imaging (MELBA) https://melba-journal.org/2024/019</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Machine.Learning.for.Biomedical.Imaging. 2 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09523">arXiv:2411.09523</a> <span> [<a href="https://arxiv.org/pdf/2411.09523">pdf</a>, <a href="https://arxiv.org/format/2411.09523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Navigating the Risks: A Survey of Security, Privacy, and Ethics Threats in LLM-Based Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yuyou Gan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yong Yang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zhe Ma</a>, <a href="/search/cs?searchtype=author&query=He%2C+P">Ping He</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+R">Rui Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiming Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qingming Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chunyi Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Songze Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yunjun Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yingcai Wu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Shouling Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09523v1-abstract-short" style="display: inline;"> With the continuous development of large language models (LLMs), transformer-based models have made groundbreaking advances in numerous natural language processing (NLP) tasks, leading to the emergence of a series of agents that use LLMs as their control hub. While LLMs have achieved success in various tasks, they face numerous security and privacy threats, which become even more severe in the age… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09523v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09523v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09523v1-abstract-full" style="display: none;"> With the continuous development of large language models (LLMs), transformer-based models have made groundbreaking advances in numerous natural language processing (NLP) tasks, leading to the emergence of a series of agents that use LLMs as their control hub. While LLMs have achieved success in various tasks, they face numerous security and privacy threats, which become even more severe in the agent scenarios. To enhance the reliability of LLM-based applications, a range of research has emerged to assess and mitigate these risks from different perspectives. To help researchers gain a comprehensive understanding of various risks, this survey collects and analyzes the different threats faced by these agents. To address the challenges posed by previous taxonomies in handling cross-module and cross-stage threats, we propose a novel taxonomy framework based on the sources and impacts. Additionally, we identify six key features of LLM-based agents, based on which we summarize the current research progress and analyze their limitations. Subsequently, we select four representative agents as case studies to analyze the risks they may face in practical use. Finally, based on the aforementioned analyses, we propose future research directions from the perspectives of data, methodology, and policy, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09523v1-abstract-full').style.display = 'none'; document.getElementById('2411.09523v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09349">arXiv:2411.09349</a> <span> [<a href="https://arxiv.org/pdf/2411.09349">pdf</a>, <a href="https://arxiv.org/format/2411.09349">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ParaLBench: A Large-Scale Benchmark for Computational Paralinguistics over Acoustic Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixing Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Weixiang Xu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhongren Dong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kanglin Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yimeng Wu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jing Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Runming Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+D">Dong-Yan Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09349v1-abstract-short" style="display: inline;"> Computational paralinguistics (ComParal) aims to develop algorithms and models to automatically detect, analyze, and interpret non-verbal information from speech communication, e. g., emotion, health state, age, and gender. Despite its rapid progress, it heavily depends on sophisticatedly designed models given specific paralinguistic tasks. Thus, the heterogeneity and diversity of ComParal models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09349v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09349v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09349v1-abstract-full" style="display: none;"> Computational paralinguistics (ComParal) aims to develop algorithms and models to automatically detect, analyze, and interpret non-verbal information from speech communication, e. g., emotion, health state, age, and gender. Despite its rapid progress, it heavily depends on sophisticatedly designed models given specific paralinguistic tasks. Thus, the heterogeneity and diversity of ComParal models largely prevent the realistic implementation of ComParal models. Recently, with the advent of acoustic foundation models because of self-supervised learning, developing more generic models that can efficiently perceive a plethora of paralinguistic information has become an active topic in speech processing. However, it lacks a unified evaluation framework for a fair and consistent performance comparison. To bridge this gap, we conduct a large-scale benchmark, namely ParaLBench, which concentrates on standardizing the evaluation process of diverse paralinguistic tasks, including critical aspects of affective computing such as emotion recognition and emotion dimensions prediction, over different acoustic foundation models. This benchmark contains ten datasets with thirteen distinct paralinguistic tasks, covering short-, medium- and long-term characteristics. Each task is carried out on 14 acoustic foundation models under a unified evaluation framework, which allows for an unbiased methodological comparison and offers a grounded reference for the ComParal community. Based on the insights gained from ParaLBench, we also point out potential research directions, i.e., the cross-corpus generalizability, to propel ComParal research in the future. The code associated with this study will be available to foster the transparency and replicability of this work for succeeding researchers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09349v1-abstract-full').style.display = 'none'; document.getElementById('2411.09349v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08804">arXiv:2411.08804</a> <span> [<a href="https://arxiv.org/pdf/2411.08804">pdf</a>, <a href="https://arxiv.org/format/2411.08804">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Finance">q-fin.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Trading and Market Microstructure">q-fin.TR</span> </div> </div> <p class="title is-5 mathjax"> FinRobot: AI Agent for Equity Research and Valuation with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tianyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pinqiao Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yilin Wu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongyang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08804v1-abstract-short" style="display: inline;"> As financial markets grow increasingly complex, there is a rising need for automated tools that can effectively assist human analysts in equity research, particularly within sell-side research. While Generative AI (GenAI) has attracted significant attention in this field, existing AI solutions often fall short due to their narrow focus on technical factors and limited capacity for discretionary ju… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08804v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08804v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08804v1-abstract-full" style="display: none;"> As financial markets grow increasingly complex, there is a rising need for automated tools that can effectively assist human analysts in equity research, particularly within sell-side research. While Generative AI (GenAI) has attracted significant attention in this field, existing AI solutions often fall short due to their narrow focus on technical factors and limited capacity for discretionary judgment. These limitations hinder their ability to adapt to new data in real-time and accurately assess risks, which diminishes their practical value for investors. This paper presents FinRobot, the first AI agent framework specifically designed for equity research. FinRobot employs a multi-agent Chain of Thought (CoT) system, integrating both quantitative and qualitative analyses to emulate the comprehensive reasoning of a human analyst. The system is structured around three specialized agents: the Data-CoT Agent, which aggregates diverse data sources for robust financial integration; the Concept-CoT Agent, which mimics an analysts reasoning to generate actionable insights; and the Thesis-CoT Agent, which synthesizes these insights into a coherent investment thesis and report. FinRobot provides thorough company analysis supported by precise numerical data, industry-appropriate valuation metrics, and realistic risk assessments. Its dynamically updatable data pipeline ensures that research remains timely and relevant, adapting seamlessly to new financial information. Unlike existing automated research tools, such as CapitalCube and Wright Reports, FinRobot delivers insights comparable to those produced by major brokerage firms and fundamental research vendors. We open-source FinRobot at \url{https://github. com/AI4Finance-Foundation/FinRobot}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08804v1-abstract-full').style.display = 'none'; document.getElementById('2411.08804v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 1st Workshop on LLMs and Generative AI for Finance, ICAIF 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08164">arXiv:2411.08164</a> <span> [<a href="https://arxiv.org/pdf/2411.08164">pdf</a>, <a href="https://arxiv.org/format/2411.08164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EAPCR: A Universal Feature Extractor for Scientific Data without Explicit Feature Relation Patterns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhuohang Yu</a>, <a href="/search/cs?searchtype=author&query=An%2C+L">Ling An</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yansong Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zeyu Dong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhangdi Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Le Gao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chichun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08164v1-abstract-short" style="display: inline;"> Conventional methods, including Decision Tree (DT)-based methods, have been effective in scientific tasks, such as non-image medical diagnostics, system anomaly detection, and inorganic catalysis efficiency prediction. However, most deep-learning techniques have struggled to surpass or even match this level of success as traditional machine-learning methods. The primary reason is that these applic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08164v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08164v1-abstract-full" style="display: none;"> Conventional methods, including Decision Tree (DT)-based methods, have been effective in scientific tasks, such as non-image medical diagnostics, system anomaly detection, and inorganic catalysis efficiency prediction. However, most deep-learning techniques have struggled to surpass or even match this level of success as traditional machine-learning methods. The primary reason is that these applications involve multi-source, heterogeneous data where features lack explicit relationships. This contrasts with image data, where pixels exhibit spatial relationships; textual data, where words have sequential dependencies; and graph data, where nodes are connected through established associations. The absence of explicit Feature Relation Patterns (FRPs) presents a significant challenge for deep learning techniques in scientific applications that are not image, text, and graph-based. In this paper, we introduce EAPCR, a universal feature extractor designed for data without explicit FRPs. Tested across various scientific tasks, EAPCR consistently outperforms traditional methods and bridges the gap where deep learning models fall short. To further demonstrate its robustness, we synthesize a dataset without explicit FRPs. While Kolmogorov-Arnold Network (KAN) and feature extractors like Convolutional Neural Networks (CNNs), Graph Convolutional Networks (GCNs), and Transformers struggle, EAPCR excels, demonstrating its robustness and superior performance in scientific tasks without FRPs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08164v1-abstract-full').style.display = 'none'; document.getElementById('2411.08164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07515">arXiv:2411.07515</a> <span> [<a href="https://arxiv.org/pdf/2411.07515">pdf</a>, <a href="https://arxiv.org/format/2411.07515">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Deep Learning Approach for Real-time Lane-based Arrival Curve Reconstruction at Intersection using License Plate Recognition Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+Y">Yang He</a>, <a href="/search/cs?searchtype=author&query=An%2C+C">Chengchuan An</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiawei Lu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yao-Jan Wu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhenbo Lu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+J">Jingxin Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07515v1-abstract-short" style="display: inline;"> The acquisition of real-time and accurate traffic arrival information is of vital importance for proactive traffic control systems, especially in partially connected vehicle environments. License plate recognition (LPR) data that record both vehicle departures and identities are proven to be desirable in reconstructing lane-based arrival curves in previous works. Existing LPR databased methods are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07515v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07515v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07515v1-abstract-full" style="display: none;"> The acquisition of real-time and accurate traffic arrival information is of vital importance for proactive traffic control systems, especially in partially connected vehicle environments. License plate recognition (LPR) data that record both vehicle departures and identities are proven to be desirable in reconstructing lane-based arrival curves in previous works. Existing LPR databased methods are predominantly designed for reconstructing historical arrival curves. For real-time reconstruction of multi-lane urban roads, it is pivotal to determine the lane choice of real-time link-based arrivals, which has not been exploited in previous studies. In this study, we propose a Bayesian deep learning approach for real-time lane-based arrival curve reconstruction, in which the lane choice patterns and uncertainties of link-based arrivals are both characterized. Specifically, the learning process is designed to effectively capture the relationship between partially observed link-based arrivals and lane-based arrivals, which can be physically interpreted as lane choice proportion. Moreover, the lane choice uncertainties are characterized using Bayesian parameter inference techniques, minimizing arrival curve reconstruction uncertainties, especially in low LPR data matching rate conditions. Real-world experiment results conducted in multiple matching rate scenarios demonstrate the superiority and necessity of lane choice modeling in reconstructing arrival curves. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07515v1-abstract-full').style.display = 'none'; document.getElementById('2411.07515v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by T-ITS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07267">arXiv:2411.07267</a> <span> [<a href="https://arxiv.org/pdf/2411.07267">pdf</a>, <a href="https://arxiv.org/format/2411.07267">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Data Markets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiayao Zhang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Y">Yuran Bi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Mengye Cheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinfei Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+K">Kui Ren</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qiheng Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihang Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Fernandez%2C+R+C">Raul Castro Fernandez</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haifeng Xu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+R">Ruoxi Jia</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+Y">Yongchan Kwon</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+J">Jian Pei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J+T">Jiachen T. Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+H">Haocheng Xia</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+L">Li Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaohui Yu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07267v1-abstract-short" style="display: inline;"> Data is the new oil of the 21st century. The growing trend of trading data for greater welfare has led to the emergence of data markets. A data market is any mechanism whereby the exchange of data products including datasets and data derivatives takes place as a result of data buyers and data sellers being in contact with one another, either directly or through mediating agents. It serves as a coo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07267v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07267v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07267v1-abstract-full" style="display: none;"> Data is the new oil of the 21st century. The growing trend of trading data for greater welfare has led to the emergence of data markets. A data market is any mechanism whereby the exchange of data products including datasets and data derivatives takes place as a result of data buyers and data sellers being in contact with one another, either directly or through mediating agents. It serves as a coordinating mechanism by which several functions, including the pricing and the distribution of data as the most important ones, interact to make the value of data fully exploited and enhanced. In this article, we present a comprehensive survey of this important and emerging direction from the aspects of data search, data productization, data transaction, data pricing, revenue allocation as well as privacy, security, and trust issues. We also investigate the government policies and industry status of data markets across different countries and different domains. Finally, we identify the unresolved challenges and discuss possible future directions for the development of data markets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07267v1-abstract-full').style.display = 'none'; document.getElementById('2411.07267v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07087">arXiv:2411.07087</a> <span> [<a href="https://arxiv.org/pdf/2411.07087">pdf</a>, <a href="https://arxiv.org/format/2411.07087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> OCMDP: Observation-Constrained Markov Decision Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianheng Liu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Bryan Lee</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhihao Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07087v2-abstract-short" style="display: inline;"> In many practical applications, decision-making processes must balance the costs of acquiring information with the benefits it provides. Traditional control systems often assume full observability, an unrealistic assumption when observations are expensive. We tackle the challenge of simultaneously learning observation and control strategies in such cost-sensitive environments by introducing the Ob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07087v2-abstract-full').style.display = 'inline'; document.getElementById('2411.07087v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07087v2-abstract-full" style="display: none;"> In many practical applications, decision-making processes must balance the costs of acquiring information with the benefits it provides. Traditional control systems often assume full observability, an unrealistic assumption when observations are expensive. We tackle the challenge of simultaneously learning observation and control strategies in such cost-sensitive environments by introducing the Observation-Constrained Markov Decision Process (OCMDP), where the policy influences the observability of the true state. To manage the complexity arising from the combined observation and control actions, we develop an iterative, model-free deep reinforcement learning algorithm that separates the sensing and control components of the policy. This decomposition enables efficient learning in the expanded action space by focusing on when and what to observe, as well as determining optimal control actions, without requiring knowledge of the environment's dynamics. We validate our approach on a simulated diagnostic task and a realistic healthcare environment using HeartPole. Given both scenarios, the experimental results demonstrate that our model achieves a substantial reduction in observation costs on average, significantly outperforming baseline methods by a notable margin in efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07087v2-abstract-full').style.display = 'none'; document.getElementById('2411.07087v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Full paper, 14 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06927">arXiv:2411.06927</a> <span> [<a href="https://arxiv.org/pdf/2411.06927">pdf</a>, <a href="https://arxiv.org/format/2411.06927">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-modal Iterative and Deep Fusion Frameworks for Enhanced Passive DOA Sensing via a Green Massive H2AD MIMO Receiver </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+J">Jiatong Bai</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Minghao Chen</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+W">Wankai Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yifan Li</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Cunhua Pan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+F">Feng Shu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06927v1-abstract-short" style="display: inline;"> Most existing DOA estimation methods assume ideal source incident angles with minimal noise. Moreover, directly using pre-estimated angles to calculate weighted coefficients can lead to performance loss. Thus, a green multi-modal (MM) fusion DOA framework is proposed to realize a more practical, low-cost and high time-efficiency DOA estimation for a H$^2$AD array. Firstly, two more efficient clust… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06927v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06927v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06927v1-abstract-full" style="display: none;"> Most existing DOA estimation methods assume ideal source incident angles with minimal noise. Moreover, directly using pre-estimated angles to calculate weighted coefficients can lead to performance loss. Thus, a green multi-modal (MM) fusion DOA framework is proposed to realize a more practical, low-cost and high time-efficiency DOA estimation for a H$^2$AD array. Firstly, two more efficient clustering methods, global maximum cos\_similarity clustering (GMaxCS) and global minimum distance clustering (GMinD), are presented to infer more precise true solutions from the candidate solution sets. Based on this, an iteration weighted fusion (IWF)-based method is introduced to iteratively update weighted fusion coefficients and the clustering center of the true solution classes by using the estimated values. Particularly, the coarse DOA calculated by fully digital (FD) subarray, serves as the initial cluster center. The above process yields two methods called MM-IWF-GMaxCS and MM-IWF-GMinD. To further provide a higher-accuracy DOA estimation, a fusion network (fusionNet) is proposed to aggregate the inferred two-part true angles and thus generates two effective approaches called MM-fusionNet-GMaxCS and MM-fusionNet-GMinD. The simulation outcomes show the proposed four approaches can achieve the ideal DOA performance and the CRLB. Meanwhile, proposed MM-fusionNet-GMaxCS and MM-fusionNet-GMinD exhibit superior DOA performance compared to MM-IWF-GMaxCS and MM-IWF-GMinD, especially in extremely-low SNR range. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06927v1-abstract-full').style.display = 'none'; document.getElementById('2411.06927v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06767">arXiv:2411.06767</a> <span> [<a href="https://arxiv.org/pdf/2411.06767">pdf</a>, <a href="https://arxiv.org/format/2411.06767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PDC & DM-SFT: A Road for LLM SQL Bug-Fix Enhancing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yiwen Duan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yonghong Yu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiaoming Zhao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yichang Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenbo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06767v1-abstract-short" style="display: inline;"> Code Large Language Models (Code LLMs), such as Code llama and DeepSeek-Coder, have demonstrated exceptional performance in the code generation tasks. However, most existing models focus on the abilities of generating correct code, but often struggle with bug repair. We introduce a suit of methods to enhance LLM's SQL bug-fixing abilities. The methods are mainly consisted of two parts: A Progressi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06767v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06767v1-abstract-full" style="display: none;"> Code Large Language Models (Code LLMs), such as Code llama and DeepSeek-Coder, have demonstrated exceptional performance in the code generation tasks. However, most existing models focus on the abilities of generating correct code, but often struggle with bug repair. We introduce a suit of methods to enhance LLM's SQL bug-fixing abilities. The methods are mainly consisted of two parts: A Progressive Dataset Construction (PDC) from scratch and Dynamic Mask Supervised Fine-tuning (DM-SFT). PDC proposes two data expansion methods from the perspectives of breadth first and depth first respectively. DM-SFT introduces an efficient bug-fixing supervised learning approach, which effectively reduce the total training steps and mitigate the "disorientation" in SQL code bug-fixing training. In our evaluation, the code LLM models trained with two methods have exceeds all current best performing model which size is much larger. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06767v1-abstract-full').style.display = 'none'; document.getElementById('2411.06767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">COLING-Industry 2025 accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06765">arXiv:2411.06765</a> <span> [<a href="https://arxiv.org/pdf/2411.06765">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Research on an intelligent fault diagnosis method for nuclear power plants based on ETCN-SSA combined algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fang%2C+J">Jiayan Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Siwei Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yichun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06765v1-abstract-short" style="display: inline;"> Utilizing fault diagnosis methods is crucial for nuclear power professionals to achieve efficient and accurate fault diagnosis for nuclear power plants (NPPs). The performance of traditional methods is limited by their dependence on complex feature extraction and skilled expert knowledge, which can be time-consuming and subjective. This paper proposes a novel intelligent fault diagnosis method for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06765v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06765v1-abstract-full" style="display: none;"> Utilizing fault diagnosis methods is crucial for nuclear power professionals to achieve efficient and accurate fault diagnosis for nuclear power plants (NPPs). The performance of traditional methods is limited by their dependence on complex feature extraction and skilled expert knowledge, which can be time-consuming and subjective. This paper proposes a novel intelligent fault diagnosis method for NPPs that combines enhanced temporal convolutional network (ETCN) with sparrow search algorithm (SSA). ETCN utilizes temporal convolutional network (TCN), self-attention (SA) mechanism and residual block for enhancing performance. ETCN excels at extracting local features and capturing time series information, while SSA adaptively optimizes its hyperparameters for superior performance. The proposed method's performance is experimentally verified on a CPR1000 simulation dataset. Compared to other advanced intelligent fault diagnosis methods, the proposed one demonstrates superior performance across all evaluation metrics. This makes it a promising tool for NPP intelligent fault diagnosis, ultimately enhancing operational reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06765v1-abstract-full').style.display = 'none'; document.getElementById('2411.06765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06655">arXiv:2411.06655</a> <span> [<a href="https://arxiv.org/pdf/2411.06655">pdf</a>, <a href="https://arxiv.org/format/2411.06655">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Explore the Reasoning Capability of LLMs in the Chess Testbed </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shu Wang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+L">Lei Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Renxi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wenxiao Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haokun Liu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Y">Yifan Hou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y+N">Ying Nian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06655v1-abstract-short" style="display: inline;"> Reasoning is a central capability of human intelligence. In recent years, with the advent of large-scale datasets, pretrained large language models have emerged with new capabilities, including reasoning. However, these models still struggle with long-term, complex reasoning tasks, such as playing chess. Based on the observation that expert chess players employ a dual approach combining long-term… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06655v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06655v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06655v1-abstract-full" style="display: none;"> Reasoning is a central capability of human intelligence. In recent years, with the advent of large-scale datasets, pretrained large language models have emerged with new capabilities, including reasoning. However, these models still struggle with long-term, complex reasoning tasks, such as playing chess. Based on the observation that expert chess players employ a dual approach combining long-term strategic play with short-term tactical play along with language explanation, we propose improving the reasoning capability of large language models in chess by integrating annotated strategy and tactic. Specifically, we collect a dataset named MATE, which consists of 1 million chess positions with candidate moves annotated by chess experts for strategy and tactics. We finetune the LLaMA-3-8B model and compare it against state-of-the-art commercial language models in the task of selecting better chess moves. Our experiments show that our models perform better than GPT, Claude, and Gemini models. We find that language explanations can enhance the reasoning capability of large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06655v1-abstract-full').style.display = 'none'; document.getElementById('2411.06655v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to NAACL2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06339">arXiv:2411.06339</a> <span> [<a href="https://arxiv.org/pdf/2411.06339">pdf</a>, <a href="https://arxiv.org/ps/2411.06339">ps</a>, <a href="https://arxiv.org/format/2411.06339">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Shaped Multilevel Polar Coding for Wiretap Channel </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+P">Peihong Yuan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenjun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06339v1-abstract-short" style="display: inline;"> A wiretap channel is served as the fundamental model of physical layer security techniques, where the secrecy capacity of the Gaussian wiretap channel is proven to be achieved by Gaussian input. However, there remains a gap between the Gaussian secrecy capacity and the secrecy rate with conventional uniformly distributed discrete constellation input, e.g. amplitude shift keying (ASK) and quadratur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06339v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06339v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06339v1-abstract-full" style="display: none;"> A wiretap channel is served as the fundamental model of physical layer security techniques, where the secrecy capacity of the Gaussian wiretap channel is proven to be achieved by Gaussian input. However, there remains a gap between the Gaussian secrecy capacity and the secrecy rate with conventional uniformly distributed discrete constellation input, e.g. amplitude shift keying (ASK) and quadrature amplitude modulation (QAM). In this paper, we propose a probabilistic shaped multilevel polar coding scheme to bridge the gap. Specifically, the input distribution optimization problem for maximizing the secrecy rate with ASK/QAM input is solved. Numerical results show that the resulting sub-optimal solution can still approach the Gaussian secrecy capacity. Then, we investigate the polarization of multilevel polar codes for the asymmetric discrete memoryless wiretap channel, and thus propose a multilevel polar coding scheme integration with probabilistic shaping. It is proved that the scheme can achieve the secrecy capacity of the Gaussian wiretap channel with discrete constellation input, and satisfies the reliability condition and weak security condition. A security-oriented polar code construction method to natively satisfies the leakage-based security condition is also investigated. Simulation results show that the proposed scheme achieves more efficient and secure transmission than the uniform constellation input case over both the Gaussian wiretap channel and the Rayleigh fading wiretap channel. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06339v1-abstract-full').style.display = 'none'; document.getElementById('2411.06339v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE JSAC NGAT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06248">arXiv:2411.06248</a> <span> [<a href="https://arxiv.org/pdf/2411.06248">pdf</a>, <a href="https://arxiv.org/format/2411.06248">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Robust Detection of LLM-Generated Text: A Comparative Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yongye Su</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuqing Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06248v1-abstract-short" style="display: inline;"> The ability of large language models to generate complex texts allows them to be widely integrated into many aspects of life, and their output can quickly fill all network resources. As the impact of LLMs grows, it becomes increasingly important to develop powerful detectors for the generated text. This detector is essential to prevent the potential misuse of these technologies and to protect area… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06248v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06248v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06248v1-abstract-full" style="display: none;"> The ability of large language models to generate complex texts allows them to be widely integrated into many aspects of life, and their output can quickly fill all network resources. As the impact of LLMs grows, it becomes increasingly important to develop powerful detectors for the generated text. This detector is essential to prevent the potential misuse of these technologies and to protect areas such as social media from the negative effects of false content generated by LLMS. The main goal of LLM-generated text detection is to determine whether text is generated by an LLM, which is a basic binary classification task. In our work, we mainly use three different classification methods based on open source datasets: traditional machine learning techniques such as logistic regression, k-means clustering, Gaussian Naive Bayes, support vector machines, and methods based on converters such as BERT, and finally algorithms that use LLMs to detect LLM-generated text. We focus on model generalization, potential adversarial attacks, and accuracy of model evaluation. Finally, the possible research direction in the future is proposed, and the current experimental results are summarized. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06248v1-abstract-full').style.display = 'none'; document.getElementById('2411.06248v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05451">arXiv:2411.05451</a> <span> [<a href="https://arxiv.org/pdf/2411.05451">pdf</a>, <a href="https://arxiv.org/format/2411.05451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> WorkflowLLM: Enhancing Workflow Orchestration Capability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+S">Shengda Fan</a>, <a href="/search/cs?searchtype=author&query=Cong%2C+X">Xin Cong</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yuepeng Fu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuyan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanwei Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yesai Wu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yankai Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05451v1-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) have driven a revolutionary paradigm shift in process automation from Robotic Process Automation to Agentic Process Automation by automating the workflow orchestration procedure based on LLMs. However, existing LLMs (even the advanced OpenAI GPT-4o) are confined to achieving satisfactory capability in workflow orchestration. To address this limit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05451v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05451v1-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) have driven a revolutionary paradigm shift in process automation from Robotic Process Automation to Agentic Process Automation by automating the workflow orchestration procedure based on LLMs. However, existing LLMs (even the advanced OpenAI GPT-4o) are confined to achieving satisfactory capability in workflow orchestration. To address this limitation, we present WorkflowLLM, a data-centric framework elaborately designed to enhance the capability of LLMs in workflow orchestration. It first constructs a large-scale fine-tuning dataset WorkflowBench with 106,763 samples, covering 1,503 APIs from 83 applications across 28 categories. Specifically, the construction process can be divided into three phases: (1) Data Collection: we collect real-world workflow data from Apple Shortcuts and RoutineHub, transcribing them into Python-style code. We further equip them with generated hierarchical thought via ChatGPT. (2) Query Expansion: we prompt ChatGPT to generate more task queries to enrich the diversity and complexity of workflows. (3) Workflow Generation: we leverage an annotator model trained on collected data to generate workflows for synthesized queries. Finally, we merge the synthetic samples that pass quality confirmation with the collected samples to obtain the WorkflowBench. Based on WorkflowBench, we fine-tune Llama-3.1-8B to obtain WorkflowLlama. Our experiments show that WorkflowLlama demonstrates a strong capacity to orchestrate complex workflows, while also achieving notable generalization performance on previously unseen APIs. Additionally, WorkflowBench exhibits robust zero-shot generalization capabilities on an out-of-distribution task planning dataset, T-Eval. Our data and code are available at https://github.com/OpenBMB/WorkflowLLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05451v1-abstract-full').style.display = 'none'; document.getElementById('2411.05451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05010">arXiv:2411.05010</a> <span> [<a href="https://arxiv.org/pdf/2411.05010">pdf</a>, <a href="https://arxiv.org/format/2411.05010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scattered Forest Search: Smarter Code Space Exploration with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Light%2C+J">Jonathan Light</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yue Wu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yiyou Sun</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenchao Yu</a>, <a href="/search/cs?searchtype=author&query=liu%2C+Y">Yanchi liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xujiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Ziniu Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wei Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05010v1-abstract-short" style="display: inline;"> We propose a novel approach to scaling LLM inference for code generation. We frame code generation as a black box optimization problem within the code space, and employ optimization-inspired techniques to enhance exploration. Specifically, we introduce Scattered Forest Search to enhance solution diversity while searching for solutions. Our theoretical analysis illustrates how these methods avoid l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05010v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05010v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05010v1-abstract-full" style="display: none;"> We propose a novel approach to scaling LLM inference for code generation. We frame code generation as a black box optimization problem within the code space, and employ optimization-inspired techniques to enhance exploration. Specifically, we introduce Scattered Forest Search to enhance solution diversity while searching for solutions. Our theoretical analysis illustrates how these methods avoid local optima during optimization. Extensive experiments on HumanEval, MBPP, APPS, CodeContests, and Leetcode reveal significant performance improvements. For instance, our method achieves a pass@1 rate of 67.1% on HumanEval+ and 87.2% on HumanEval with GPT-3.5, marking improvements of 8.6% and 4.3% over the state-of-the-art, while also halving the iterations needed to find the correct solution. Furthermore, our method scales more efficiently than existing search techniques, including tree search, line search, and repeated sampling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05010v1-abstract-full').style.display = 'none'; document.getElementById('2411.05010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04558">arXiv:2411.04558</a> <span> [<a href="https://arxiv.org/pdf/2411.04558">pdf</a>, <a href="https://arxiv.org/format/2411.04558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Experimental Secure Multiparty Computation from Quantum Oblivious Transfer with Bit Commitment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai-Yi Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+A">An-Jing Huang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+K">Kun Tu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming-Han Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chi Zhang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+W">Wei Qi</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Ya-Dong Wu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yu Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04558v1-abstract-short" style="display: inline;"> Secure multiparty computation enables collaborative computations across multiple users while preserving individual privacy, which has a wide range of applications in finance, machine learning and healthcare. Secure multiparty computation can be realized using oblivious transfer as a primitive function. In this paper, we present an experimental implementation of a quantum-secure quantum oblivious t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04558v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04558v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04558v1-abstract-full" style="display: none;"> Secure multiparty computation enables collaborative computations across multiple users while preserving individual privacy, which has a wide range of applications in finance, machine learning and healthcare. Secure multiparty computation can be realized using oblivious transfer as a primitive function. In this paper, we present an experimental implementation of a quantum-secure quantum oblivious transfer (QOT) protocol using an adapted quantum key distribution system combined with a bit commitment scheme, surpassing previous approaches only secure in the noisy storage model. We demonstrate the first practical application of the QOT protocol by solving the private set intersection, a prime example of secure multiparty computation, where two parties aim to find common elements in their datasets without revealing any other information. In our experiments, two banks can identify common suspicious accounts without disclosing any other data. This not only proves the experimental functionality of QOT, but also showcases its real-world commercial applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04558v1-abstract-full').style.display = 'none'; document.getElementById('2411.04558v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04476">arXiv:2411.04476</a> <span> [<a href="https://arxiv.org/pdf/2411.04476">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LLM-R: A Framework for Domain-Adaptive Maintenance Scheme Generation Combining Hierarchical Agents and RAG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tao%2C+L">Laifa Tao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xianjun Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yunlong Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chen Lu</a>, <a href="/search/cs?searchtype=author&query=Hai%2C+X">Xingshuo Hai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04476v1-abstract-short" style="display: inline;"> The increasing use of smart devices has emphasized the critical role of maintenance in production activities. Interactive Electronic Technical Manuals (IETMs) are vital tools that support the maintenance of smart equipment. However, traditional IETMs face challenges such as transitioning from Graphical User Interfaces (GUIs) to natural Language User Interfaces (LUIs) and managing complex logical r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04476v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04476v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04476v1-abstract-full" style="display: none;"> The increasing use of smart devices has emphasized the critical role of maintenance in production activities. Interactive Electronic Technical Manuals (IETMs) are vital tools that support the maintenance of smart equipment. However, traditional IETMs face challenges such as transitioning from Graphical User Interfaces (GUIs) to natural Language User Interfaces (LUIs) and managing complex logical relationships. Additionally, they must meet the current demands for higher intelligence. This paper proposes a Maintenance Scheme Generation Method based on Large Language Models (LLM-R). The proposed method includes several key innovations: We propose the Low Rank Adaptation-Knowledge Retention (LORA-KR) loss technology to proportionally adjust mixed maintenance data for fine-tuning the LLM. This method prevents knowledge conflicts caused by mixed data, improving the model's adaptability and reasoning ability in specific maintenance domains, Besides, Hierarchical Task-Based Agent and Instruction-level Retrieval-Augmented Generation (RAG) technologies are adopted to optimize the generation steps and mitigate the phenomenon of hallucination caused by the model's Inability to access contextual information. This enhancement improves the model's flexibility and accuracy in handling known or unknown maintenance objects and maintenance scheme scenarios. To validate the proposed method's effectiveness in maintenance tasks, a maintenance scheme dataset was constructed using objects from different fields. The experimental results show that the accuracy of the maintenance schemes generated by the proposed method reached 91.59%, indicating which improvement enhances the intelligence of maintenance schemes and introduces novel technical approaches for equipment maintenance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04476v1-abstract-full').style.display = 'none'; document.getElementById('2411.04476v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03862">arXiv:2411.03862</a> <span> [<a href="https://arxiv.org/pdf/2411.03862">pdf</a>, <a href="https://arxiv.org/format/2411.03862">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> ROBIN: Robust and Invisible Watermarks for Diffusion Models with Adversarial Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+H">Huayang Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03862v1-abstract-short" style="display: inline;"> Watermarking generative content serves as a vital tool for authentication, ownership protection, and mitigation of potential misuse. Existing watermarking methods face the challenge of balancing robustness and concealment. They empirically inject a watermark that is both invisible and robust and passively achieve concealment by limiting the strength of the watermark, thus reducing the robustness.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03862v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03862v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03862v1-abstract-full" style="display: none;"> Watermarking generative content serves as a vital tool for authentication, ownership protection, and mitigation of potential misuse. Existing watermarking methods face the challenge of balancing robustness and concealment. They empirically inject a watermark that is both invisible and robust and passively achieve concealment by limiting the strength of the watermark, thus reducing the robustness. In this paper, we propose to explicitly introduce a watermark hiding process to actively achieve concealment, thus allowing the embedding of stronger watermarks. To be specific, we implant a robust watermark in an intermediate diffusion state and then guide the model to hide the watermark in the final generated image. We employ an adversarial optimization algorithm to produce the optimal hiding prompt guiding signal for each watermark. The prompt embedding is optimized to minimize artifacts in the generated image, while the watermark is optimized to achieve maximum strength. The watermark can be verified by reversing the generation process. Experiments on various diffusion models demonstrate the watermark remains verifiable even under significant image tampering and shows superior invisibility compared to other state-of-the-art robust watermarking methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03862v1-abstract-full').style.display = 'none'; document.getElementById('2411.03862v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accept to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03644">arXiv:2411.03644</a> <span> [<a href="https://arxiv.org/pdf/2411.03644">pdf</a>, <a href="https://arxiv.org/format/2411.03644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Deploying Multi-task Online Server with Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+Y">Yincen Qu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chao Ma</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+X">Xiangying Dai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiting Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengyue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03644v2-abstract-short" style="display: inline;"> In the industry, numerous tasks are deployed online. Traditional approaches often tackle each task separately by its own network, which leads to excessive costs for developing and scaling models, especially in the context of large language models. Although multi-task methods can save costs through parameter sharing, they often struggle to outperform single-task methods in real-world applications.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03644v2-abstract-full').style.display = 'inline'; document.getElementById('2411.03644v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03644v2-abstract-full" style="display: none;"> In the industry, numerous tasks are deployed online. Traditional approaches often tackle each task separately by its own network, which leads to excessive costs for developing and scaling models, especially in the context of large language models. Although multi-task methods can save costs through parameter sharing, they often struggle to outperform single-task methods in real-world applications. To tackle these challenges, we present a three-stage multi-task learning framework for large language models. It involves task filtering, followed by fine-tuning on high-resource tasks, and finally fine-tuning on all tasks. We conducted comprehensive experiments in single-task and multi-task settings. Our approach, exemplified on different benchmarks, demonstrates that it is able to achieve performance comparable to the single-task method while reducing up to 90.9\% of its overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03644v2-abstract-full').style.display = 'none'; document.getElementById('2411.03644v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by COLING 2025 Industry Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02999">arXiv:2411.02999</a> <span> [<a href="https://arxiv.org/pdf/2411.02999">pdf</a>, <a href="https://arxiv.org/format/2411.02999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Precise Drive with VLM: First Prize Solution for PRCV 2024 Drive LM challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+B">Bin Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Siyu Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuanpeng Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yidan Wu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Hui Song</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zifan Ding</a>, <a href="/search/cs?searchtype=author&query=Leng%2C+J">Jing Leng</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chengpeng Liang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+P">Peng Xue</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+T">Tiankun Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02999v1-abstract-short" style="display: inline;"> This technical report outlines the methodologies we applied for the PRCV Challenge, focusing on cognition and decision-making in driving scenarios. We employed InternVL-2.0, a pioneering open-source multi-modal model, and enhanced it by refining both the model input and training methodologies. For the input data, we strategically concatenated and formatted the multi-view images. It is worth mentio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02999v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02999v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02999v1-abstract-full" style="display: none;"> This technical report outlines the methodologies we applied for the PRCV Challenge, focusing on cognition and decision-making in driving scenarios. We employed InternVL-2.0, a pioneering open-source multi-modal model, and enhanced it by refining both the model input and training methodologies. For the input data, we strategically concatenated and formatted the multi-view images. It is worth mentioning that we utilized the coordinates of the original images without transformation. In terms of model training, we initially pre-trained the model on publicly available autonomous driving scenario datasets to bolster its alignment capabilities of the challenge tasks, followed by fine-tuning on the DriveLM-nuscenes Dataset. During the fine-tuning phase, we innovatively modified the loss function to enhance the model's precision in predicting coordinate values. These approaches ensure that our model possesses advanced cognitive and decision-making capabilities in driving scenarios. Consequently, our model achieved a score of 0.6064, securing the first prize on the competition's final results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02999v1-abstract-full').style.display = 'none'; document.getElementById('2411.02999v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02902">arXiv:2411.02902</a> <span> [<a href="https://arxiv.org/pdf/2411.02902">pdf</a>, <a href="https://arxiv.org/format/2411.02902">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Membership Inference Attacks against Large Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhan Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yongtao Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yihang Chen</a>, <a href="/search/cs?searchtype=author&query=Tonin%2C+F">Francesco Tonin</a>, <a href="/search/cs?searchtype=author&query=Rocamora%2C+E+A">Elias Abad Rocamora</a>, <a href="/search/cs?searchtype=author&query=Cevher%2C+V">Volkan Cevher</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02902v1-abstract-short" style="display: inline;"> Large vision-language models (VLLMs) exhibit promising capabilities for processing multi-modal tasks across various application scenarios. However, their emergence also raises significant data security concerns, given the potential inclusion of sensitive information, such as private photos and medical records, in their training datasets. Detecting inappropriately used data in VLLMs remains a criti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02902v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02902v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02902v1-abstract-full" style="display: none;"> Large vision-language models (VLLMs) exhibit promising capabilities for processing multi-modal tasks across various application scenarios. However, their emergence also raises significant data security concerns, given the potential inclusion of sensitive information, such as private photos and medical records, in their training datasets. Detecting inappropriately used data in VLLMs remains a critical and unresolved issue, mainly due to the lack of standardized datasets and suitable methodologies. In this study, we introduce the first membership inference attack (MIA) benchmark tailored for various VLLMs to facilitate training data detection. Then, we propose a novel MIA pipeline specifically designed for token-level image detection. Lastly, we present a new metric called MaxR茅nyi-K%, which is based on the confidence of the model output and applies to both text and image data. We believe that our work can deepen the understanding and methodology of MIAs in the context of VLLMs. Our code and datasets are available at https://github.com/LIONS-EPFL/VL-MIA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02902v1-abstract-full').style.display = 'none'; document.getElementById('2411.02902v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02829">arXiv:2411.02829</a> <span> [<a href="https://arxiv.org/pdf/2411.02829">pdf</a>, <a href="https://arxiv.org/format/2411.02829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CE-CoLLM: Efficient and Adaptive Large Language Models Through Cloud-Edge Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+H">Hongpeng Jin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yanzhao Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02829v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved remarkable success in serving end-users with human-like intelligence. However, LLMs demand high computational resources, making it challenging to deploy them to satisfy various performance objectives, such as meeting the resource constraints on edge devices close to end-users or achieving high accuracy with ample resources. In this paper, we introduce CE-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02829v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02829v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved remarkable success in serving end-users with human-like intelligence. However, LLMs demand high computational resources, making it challenging to deploy them to satisfy various performance objectives, such as meeting the resource constraints on edge devices close to end-users or achieving high accuracy with ample resources. In this paper, we introduce CE-CoLLM, a novel cloud-edge collaboration framework that supports efficient and adaptive LLM inference for end-users at the edge with two modes, (1) low-latency edge standalone inference and (2) highly accurate cloud-edge collaborative inference. First, we show that the inherent high communication costs for transmitting LLM contextual information between the edge and cloud dominate the overall latency, making it inefficient and costly to deploy LLMs using cloud-edge collaboration. Second, we propose several critical techniques to address this challenge, including early-exit mechanism, cloud context manager, and quantization in cloud-edge collaboration to enable not only low-latency standalone edge inference but also efficient and adaptive cloud-edge collaborative inference for LLMs. Third, we perform comprehensive experimental analysis, which demonstrates that CE-CoLLM significantly reduces inference time by up to 13.81% and cloud computation costs by up to 84.55% compared to the popular cloud-based LLM deployment, while maintaining comparable model accuracy. The proposed approach effectively shifts the computational load to the edge, reduces the communication overhead, scales efficiently with multiple edge clients, and provides reliable LLM deployment using cloud-edge collaboration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02829v1-abstract-full').style.display = 'none'; document.getElementById('2411.02829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02272">arXiv:2411.02272</a> <span> [<a href="https://arxiv.org/pdf/2411.02272">pdf</a>, <a href="https://arxiv.org/format/2411.02272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Combining Induction and Transduction for Abstract Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Wen-Ding Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+K">Keya Hu</a>, <a href="/search/cs?searchtype=author&query=Larsen%2C+C">Carter Larsen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuqing Wu</a>, <a href="/search/cs?searchtype=author&query=Alford%2C+S">Simon Alford</a>, <a href="/search/cs?searchtype=author&query=Woo%2C+C">Caleb Woo</a>, <a href="/search/cs?searchtype=author&query=Dunn%2C+S+M">Spencer M. Dunn</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/cs?searchtype=author&query=Naim%2C+M">Michelangelo Naim</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+D">Dat Nguyen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W">Wei-Long Zheng</a>, <a href="/search/cs?searchtype=author&query=Tavares%2C+Z">Zenna Tavares</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+Y">Yewen Pu</a>, <a href="/search/cs?searchtype=author&query=Ellis%2C+K">Kevin Ellis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02272v3-abstract-short" style="display: inline;"> When learning an input-output mapping from very few examples, is it better to first infer a latent function that explains the examples, or is it better to directly predict new test outputs, e.g. using a neural network? We study this question on ARC, a highly diverse dataset of abstract reasoning tasks. We train neural models for induction (inferring latent functions) and transduction (directly pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02272v3-abstract-full').style.display = 'inline'; document.getElementById('2411.02272v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02272v3-abstract-full" style="display: none;"> When learning an input-output mapping from very few examples, is it better to first infer a latent function that explains the examples, or is it better to directly predict new test outputs, e.g. using a neural network? We study this question on ARC, a highly diverse dataset of abstract reasoning tasks. We train neural models for induction (inferring latent functions) and transduction (directly predicting the test output for a given test input). Our models are trained on synthetic data generated by prompting LLMs to produce Python code specifying a function to be inferred, plus a stochastic subroutine for generating inputs to that function. We find inductive and transductive models solve very different problems, despite training on the same problems, and despite sharing the same neural architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02272v3-abstract-full').style.display = 'none'; document.getElementById('2411.02272v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02117">arXiv:2411.02117</a> <span> [<a href="https://arxiv.org/pdf/2411.02117">pdf</a>, <a href="https://arxiv.org/format/2411.02117">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AVSS: Layer Importance Evaluation in Large Language Models via Activation Variance-Sparsity Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zichen Song</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuxin Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Sitan Huang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Z">Zhongfeng Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02117v1-abstract-short" style="display: inline;"> The evaluation of layer importance in deep learning has been an active area of research, with significant implications for model optimization and interpretability. Recently, large language models (LLMs) have gained prominence across various domains, yet limited studies have explored the functional importance and performance contributions of individual layers within LLMs, especially from the perspe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02117v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02117v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02117v1-abstract-full" style="display: none;"> The evaluation of layer importance in deep learning has been an active area of research, with significant implications for model optimization and interpretability. Recently, large language models (LLMs) have gained prominence across various domains, yet limited studies have explored the functional importance and performance contributions of individual layers within LLMs, especially from the perspective of activation distribution. In this work, we propose the Activation Variance-Sparsity Score (AVSS), a novel metric combining normalized activation variance and sparsity to assess each layer's contribution to model performance. By identifying and removing approximately the lowest 25% of layers based on AVSS, we achieve over 90% of original model performance across tasks such as question answering, language modeling, and sentiment classification, indicating that these layers may be non-essential. Our approach provides a systematic method for identifying less critical layers, contributing to efficient large language model architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02117v1-abstract-full').style.display = 'none'; document.getElementById('2411.02117v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02028">arXiv:2411.02028</a> <span> [<a href="https://arxiv.org/pdf/2411.02028">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> An Immediate Update Strategy of Multi-State Constraint Kalman Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qingchao Zhang</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wei Ouyang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jiale Han</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Q">Qi Cai</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Maoran Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yuanxin Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02028v1-abstract-short" style="display: inline;"> The lightweight Multi-state Constraint Kalman Filter (MSCKF) has been well-known for its high efficiency, in which the delayed update has been usually adopted since its proposal. This work investigates the immediate update strategy of MSCKF based on timely reconstructed 3D feature points and measurement constraints. The differences between the delayed update and the immediate update are theoretica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02028v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02028v1-abstract-full" style="display: none;"> The lightweight Multi-state Constraint Kalman Filter (MSCKF) has been well-known for its high efficiency, in which the delayed update has been usually adopted since its proposal. This work investigates the immediate update strategy of MSCKF based on timely reconstructed 3D feature points and measurement constraints. The differences between the delayed update and the immediate update are theoretically analyzed in detail. It is found that the immediate update helps construct more observation constraints and employ more filtering updates than the delayed update, which improves the linearization point of the measurement model and therefore enhances the estimation accuracy. Numerical simulations and experiments show that the immediate update strategy significantly enhances MSCKF even with a small amount of feature observations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02028v1-abstract-full').style.display = 'none'; document.getElementById('2411.02028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01545">arXiv:2411.01545</a> <span> [<a href="https://arxiv.org/pdf/2411.01545">pdf</a>, <a href="https://arxiv.org/format/2411.01545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3664647">10.1145/3664647 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Towards Small Object Editing: A Benchmark Dataset and A Training-Free Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+Q">Qihe Pan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhen Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zicheng Wang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+S">Sifan Long</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiming Wu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+W">Wei Ji</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Haoran Liang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+R">Ronghua Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01545v1-abstract-short" style="display: inline;"> A plethora of text-guided image editing methods has recently been developed by leveraging the impressive capabilities of large-scale diffusion-based generative models especially Stable Diffusion. Despite the success of diffusion models in producing high-quality images, their application to small object generation has been limited due to difficulties in aligning cross-modal attention maps between t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01545v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01545v1-abstract-full" style="display: none;"> A plethora of text-guided image editing methods has recently been developed by leveraging the impressive capabilities of large-scale diffusion-based generative models especially Stable Diffusion. Despite the success of diffusion models in producing high-quality images, their application to small object generation has been limited due to difficulties in aligning cross-modal attention maps between text and these objects. Our approach offers a training-free method that significantly mitigates this alignment issue with local and global attention guidance , enhancing the model's ability to accurately render small objects in accordance with textual descriptions. We detail the methodology in our approach, emphasizing its divergence from traditional generation techniques and highlighting its advantages. What's more important is that we also provide~\textit{SOEBench} (Small Object Editing), a standardized benchmark for quantitatively evaluating text-based small object generation collected from \textit{MSCOCO} and \textit{OpenImage}. Preliminary results demonstrate the effectiveness of our method, showing marked improvements in the fidelity and accuracy of small object generation compared to existing models. This advancement not only contributes to the field of AI and computer vision but also opens up new possibilities for applications in various industries where precise image generation is critical. We will release our dataset on our project page: \href{https://soebench.github.io/}{https://soebench.github.io/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01545v1-abstract-full').style.display = 'none'; document.getElementById('2411.01545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 8 figures, Accepted by ACMMM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01371">arXiv:2411.01371</a> <span> [<a href="https://arxiv.org/pdf/2411.01371">pdf</a>, <a href="https://arxiv.org/format/2411.01371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Network Causal Effect Estimation In Graphical Models Of Contagion And Latent Confounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yufeng Wu</a>, <a href="/search/cs?searchtype=author&query=Bhattacharya%2C+R">Rohit Bhattacharya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01371v1-abstract-short" style="display: inline;"> A key question in many network studies is whether the observed correlations between units are primarily due to contagion or latent confounding. Here, we study this question using a segregated graph (Shpitser, 2015) representation of these mechanisms, and examine how uncertainty about the true underlying mechanism impacts downstream computation of network causal effects, particularly under full int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01371v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01371v1-abstract-full" style="display: none;"> A key question in many network studies is whether the observed correlations between units are primarily due to contagion or latent confounding. Here, we study this question using a segregated graph (Shpitser, 2015) representation of these mechanisms, and examine how uncertainty about the true underlying mechanism impacts downstream computation of network causal effects, particularly under full interference -- settings where we only have a single realization of a network and each unit may depend on any other unit in the network. Under certain assumptions about asymptotic growth of the network, we derive likelihood ratio tests that can be used to identify whether different sets of variables -- confounders, treatments, and outcomes -- across units exhibit dependence due to contagion or latent confounding. We then propose network causal effect estimation strategies that provide unbiased and consistent estimates if the dependence mechanisms are either known or correctly inferred using our proposed tests. Together, the proposed methods allow network effect estimation in a wider range of full interference scenarios that have not been considered in prior work. We evaluate the effectiveness of our methods with synthetic data and the validity of our assumptions using real-world networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01371v1-abstract-full').style.display = 'none'; document.getElementById('2411.01371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01327">arXiv:2411.01327</a> <span> [<a href="https://arxiv.org/pdf/2411.01327">pdf</a>, <a href="https://arxiv.org/format/2411.01327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Visual Fourier Prompt Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+R">Runjia Zeng</a>, <a href="/search/cs?searchtype=author&query=Han%2C+C">Cheng Han</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qifan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chunshu Wu</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+T">Tong Geng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Lifu Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y+N">Ying Nian Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dongfang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01327v2-abstract-short" style="display: inline;"> With the scale of vision Transformer-based models continuing to grow, finetuning these large-scale pretrained models for new tasks has become increasingly parameter-intensive. Visual prompt tuning is introduced as a parameter-efficient finetuning (PEFT) method to this trend. Despite its successes, a notable research challenge persists within almost all PEFT approaches: significant performance degr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01327v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01327v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01327v2-abstract-full" style="display: none;"> With the scale of vision Transformer-based models continuing to grow, finetuning these large-scale pretrained models for new tasks has become increasingly parameter-intensive. Visual prompt tuning is introduced as a parameter-efficient finetuning (PEFT) method to this trend. Despite its successes, a notable research challenge persists within almost all PEFT approaches: significant performance degradation is observed when there is a substantial disparity between the datasets applied in pretraining and finetuning phases. To address this challenge, we draw inspiration from human visual cognition, and propose the Visual Fourier Prompt Tuning (VFPT) method as a general and effective solution for adapting large-scale transformer-based models. Our approach innovatively incorporates the Fast Fourier Transform into prompt embeddings and harmoniously considers both spatial and frequency domain information. Apart from its inherent simplicity and intuitiveness, VFPT exhibits superior performance across all datasets, offering a general solution to dataset challenges, irrespective of data disparities. Empirical results demonstrate that our approach outperforms current state-of-the-art baselines on two benchmarks, with low parameter usage (e.g., 0.57% of model parameters on VTAB-1k) and notable performance enhancements (e.g., 73.20% of mean accuracy on VTAB-1k). Our code is avaliable at https://github.com/runtsang/VFPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01327v2-abstract-full').style.display = 'none'; document.getElementById('2411.01327v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">[NeurIPS 2024] Homepage: https://runjia.tech/vfpt_page/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01225">arXiv:2411.01225</a> <span> [<a href="https://arxiv.org/pdf/2411.01225">pdf</a>, <a href="https://arxiv.org/format/2411.01225">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RLE: A Unified Perspective of Data Augmentation for Cross-Spectral Re-identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+L">Lei Tan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yukang Zhang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+K">Keke Han</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+P">Pingyang Dai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yongjian Wu</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+R">Rongrong Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01225v1-abstract-short" style="display: inline;"> This paper makes a step towards modeling the modality discrepancy in the cross-spectral re-identification task. Based on the Lambertain model, we observe that the non-linear modality discrepancy mainly comes from diverse linear transformations acting on the surface of different materials. From this view, we unify all data augmentation strategies for cross-spectral re-identification by mimicking su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01225v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01225v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01225v1-abstract-full" style="display: none;"> This paper makes a step towards modeling the modality discrepancy in the cross-spectral re-identification task. Based on the Lambertain model, we observe that the non-linear modality discrepancy mainly comes from diverse linear transformations acting on the surface of different materials. From this view, we unify all data augmentation strategies for cross-spectral re-identification by mimicking such local linear transformations and categorizing them into moderate transformation and radical transformation. By extending the observation, we propose a Random Linear Enhancement (RLE) strategy which includes Moderate Random Linear Enhancement (MRLE) and Radical Random Linear Enhancement (RRLE) to push the boundaries of both types of transformation. Moderate Random Linear Enhancement is designed to provide diverse image transformations that satisfy the original linear correlations under constrained conditions, whereas Radical Random Linear Enhancement seeks to generate local linear transformations directly without relying on external information. The experimental results not only demonstrate the superiority and effectiveness of RLE but also confirm its great potential as a general-purpose data augmentation for cross-spectral re-identification. The code is available at \textcolor{magenta}{\url{https://github.com/stone96123/RLE}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01225v1-abstract-full').style.display = 'none'; document.getElementById('2411.01225v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01171">arXiv:2411.01171</a> <span> [<a href="https://arxiv.org/pdf/2411.01171">pdf</a>, <a href="https://arxiv.org/format/2411.01171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fast and Memory-Efficient Video Diffusion Using Streamlined Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhan%2C+Z">Zheng Zhan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yushu Wu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yifan Gong</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Z">Zichong Meng</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+Z">Zhenglun Kong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Changdi Yang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+G">Geng Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+P">Pu Zhao</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+W">Wei Niu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanzhi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01171v1-abstract-short" style="display: inline;"> The rapid progress in artificial intelligence-generated content (AIGC), especially with diffusion models, has significantly advanced development of high-quality video generation. However, current video diffusion models exhibit demanding computational requirements and high peak memory usage, especially for generating longer and higher-resolution videos. These limitations greatly hinder the practica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01171v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01171v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01171v1-abstract-full" style="display: none;"> The rapid progress in artificial intelligence-generated content (AIGC), especially with diffusion models, has significantly advanced development of high-quality video generation. However, current video diffusion models exhibit demanding computational requirements and high peak memory usage, especially for generating longer and higher-resolution videos. These limitations greatly hinder the practical application of video diffusion models on standard hardware platforms. To tackle this issue, we present a novel, training-free framework named Streamlined Inference, which leverages the temporal and spatial properties of video diffusion models. Our approach integrates three core components: Feature Slicer, Operator Grouping, and Step Rehash. Specifically, Feature Slicer effectively partitions input features into sub-features and Operator Grouping processes each sub-feature with a group of consecutive operators, resulting in significant memory reduction without sacrificing the quality or speed. Step Rehash further exploits the similarity between adjacent steps in diffusion, and accelerates inference through skipping unnecessary steps. Extensive experiments demonstrate that our approach significantly reduces peak memory and computational overhead, making it feasible to generate high-quality videos on a single consumer GPU (e.g., reducing peak memory of AnimateDiff from 42GB to 11GB, featuring faster inference on 2080Ti). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01171v1-abstract-full').style.display = 'none'; document.getElementById('2411.01171v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00399">arXiv:2411.00399</a> <span> [<a href="https://arxiv.org/pdf/2411.00399">pdf</a>, <a href="https://arxiv.org/format/2411.00399">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> StyleTex: Style Image-Guided Texture Generation for 3D Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhiyu Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiangjun Tang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiqian Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dehan Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Gongsheng Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xaogang Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00399v1-abstract-short" style="display: inline;"> Style-guided texture generation aims to generate a texture that is harmonious with both the style of the reference image and the geometry of the input mesh, given a reference style image and a 3D mesh with its text description. Although diffusion-based 3D texture generation methods, such as distillation sampling, have numerous promising applications in stylized games and films, it requires address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00399v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00399v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00399v1-abstract-full" style="display: none;"> Style-guided texture generation aims to generate a texture that is harmonious with both the style of the reference image and the geometry of the input mesh, given a reference style image and a 3D mesh with its text description. Although diffusion-based 3D texture generation methods, such as distillation sampling, have numerous promising applications in stylized games and films, it requires addressing two challenges: 1) decouple style and content completely from the reference image for 3D models, and 2) align the generated texture with the color tone, style of the reference image, and the given text prompt. To this end, we introduce StyleTex, an innovative diffusion-model-based framework for creating stylized textures for 3D models. Our key insight is to decouple style information from the reference image while disregarding content in diffusion-based distillation sampling. Specifically, given a reference image, we first decompose its style feature from the image CLIP embedding by subtracting the embedding's orthogonal projection in the direction of the content feature, which is represented by a text CLIP embedding. Our novel approach to disentangling the reference image's style and content information allows us to generate distinct style and content features. We then inject the style feature into the cross-attention mechanism to incorporate it into the generation process, while utilizing the content feature as a negative prompt to further dissociate content information. Finally, we incorporate these strategies into StyleTex to obtain stylized textures. The resulting textures generated by StyleTex retain the style of the reference image, while also aligning with the text prompts and intrinsic details of the given 3D mesh. Quantitative and qualitative experiments show that our method outperforms existing baseline methods by a significant margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00399v1-abstract-full').style.display = 'none'; document.getElementById('2411.00399v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Siggraph Asia 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00266">arXiv:2411.00266</a> <span> [<a href="https://arxiv.org/pdf/2411.00266">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Systematic Review of NeurIPS Dataset Management Practices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiwei Wu</a>, <a href="/search/cs?searchtype=author&query=Ajmani%2C+L">Leah Ajmani</a>, <a href="/search/cs?searchtype=author&query=Longpre%2C+S">Shayne Longpre</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hanlin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00266v1-abstract-short" style="display: inline;"> As new machine learning methods demand larger training datasets, researchers and developers face significant challenges in dataset management. Although ethics reviews, documentation, and checklists have been established, it remains uncertain whether consistent dataset management practices exist across the community. This lack of a comprehensive overview hinders our ability to diagnose and address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00266v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00266v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00266v1-abstract-full" style="display: none;"> As new machine learning methods demand larger training datasets, researchers and developers face significant challenges in dataset management. Although ethics reviews, documentation, and checklists have been established, it remains uncertain whether consistent dataset management practices exist across the community. This lack of a comprehensive overview hinders our ability to diagnose and address fundamental tensions and ethical issues related to managing large datasets. We present a systematic review of datasets published at the NeurIPS Datasets and Benchmarks track, focusing on four key aspects: provenance, distribution, ethical disclosure, and licensing. Our findings reveal that dataset provenance is often unclear due to ambiguous filtering and curation processes. Additionally, a variety of sites are used for dataset hosting, but only a few offer structured metadata and version control. These inconsistencies underscore the urgent need for standardized data infrastructures for the publication and management of datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00266v1-abstract-full').style.display = 'none'; document.getElementById('2411.00266v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23774">arXiv:2410.23774</a> <span> [<a href="https://arxiv.org/pdf/2410.23774">pdf</a>, <a href="https://arxiv.org/format/2410.23774">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Convexity in Anomaly Detection: A New Formulation of SSLM with Unique Optimal Solutions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongying Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+H">Haoran Chu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yibo Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23774v1-abstract-short" style="display: inline;"> An unsolved issue in widely used methods such as Support Vector Data Description (SVDD) and Small Sphere and Large Margin SVM (SSLM) for anomaly detection is their nonconvexity, which hampers the analysis of optimal solutions in a manner similar to SVMs and limits their applicability in large-scale scenarios. In this paper, we introduce a novel convex SSLM formulation which has been demonstrated t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23774v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23774v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23774v1-abstract-full" style="display: none;"> An unsolved issue in widely used methods such as Support Vector Data Description (SVDD) and Small Sphere and Large Margin SVM (SSLM) for anomaly detection is their nonconvexity, which hampers the analysis of optimal solutions in a manner similar to SVMs and limits their applicability in large-scale scenarios. In this paper, we introduce a novel convex SSLM formulation which has been demonstrated to revert to a convex quadratic programming problem for hyperparameter values of interest. Leveraging the convexity of our method, we derive numerous results that are unattainable with traditional nonconvex approaches. We conduct a thorough analysis of how hyperparameters influence the optimal solution, pointing out scenarios where optimal solutions can be trivially found and identifying instances of ill-posedness. Most notably, we establish connections between our method and traditional approaches, providing a clear determination of when the optimal solution is unique -- a task unachievable with traditional nonconvex methods. We also derive the 谓-property to elucidate the interactions between hyperparameters and the fractions of support vectors and margin errors in both positive and negative classes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23774v1-abstract-full').style.display = 'none'; document.getElementById('2410.23774v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wu%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>