Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–38 of 38 results for author: <span class="mathjax">Lv, K</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lv%2C+K">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lv, K"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lv%2C+K&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lv, K"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11011">arXiv:2411.11011</a> <span> [<a href="https://arxiv.org/pdf/2411.11011">pdf</a>, <a href="https://arxiv.org/format/2411.11011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CCi-YOLOv8n: Enhanced Fire Detection with CARAFE and Context-Guided Modules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kunwei Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11011v1-abstract-short" style="display: inline;"> Fire incidents in urban and forested areas pose serious threats,underscoring the need for more effective detection technologies. To address these challenges, we present CCi-YOLOv8n, an enhanced YOLOv8 model with targeted improvements for detecting small fires and smoke. The model integrates the CARAFE up-sampling operator and a context-guided module to reduce information loss during up-sampling an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11011v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11011v1-abstract-full" style="display: none;"> Fire incidents in urban and forested areas pose serious threats,underscoring the need for more effective detection technologies. To address these challenges, we present CCi-YOLOv8n, an enhanced YOLOv8 model with targeted improvements for detecting small fires and smoke. The model integrates the CARAFE up-sampling operator and a context-guided module to reduce information loss during up-sampling and down-sampling, thereby retaining richer feature representations. Additionally, an inverted residual mobile block enhanced C2f module captures small targets and fine smoke patterns, a critical improvement over the original model's detection capacity.For validation, we introduce Web-Fire, a dataset curated for fire and smoke detection across diverse real-world scenarios. Experimental results indicate that CCi-YOLOv8n outperforms YOLOv8n in detection precision, confirming its effectiveness for robust fire detection tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11011v1-abstract-full').style.display = 'none'; document.getElementById('2411.11011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06044">arXiv:2410.06044</a> <span> [<a href="https://arxiv.org/pdf/2410.06044">pdf</a>, <a href="https://arxiv.org/format/2410.06044">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HyperDet: Generalizable Detection of Synthesized Images by Generating and Merging A Mixture of Hyper LoRAs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+H">Huangsen Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yongwei Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinfeng Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Sixian Zheng</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangtao Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhimeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+X">Xin Ding</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06044v1-abstract-short" style="display: inline;"> The emergence of diverse generative vision models has recently enabled the synthesis of visually realistic images, underscoring the critical need for effectively detecting these generated images from real photos. Despite advances in this field, existing detection approaches often struggle to accurately identify synthesized images generated by different generative models. In this work, we introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06044v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06044v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06044v1-abstract-full" style="display: none;"> The emergence of diverse generative vision models has recently enabled the synthesis of visually realistic images, underscoring the critical need for effectively detecting these generated images from real photos. Despite advances in this field, existing detection approaches often struggle to accurately identify synthesized images generated by different generative models. In this work, we introduce a novel and generalizable detection framework termed HyperDet, which innovatively captures and integrates shared knowledge from a collection of functionally distinct and lightweight expert detectors. HyperDet leverages a large pretrained vision model to extract general detection features while simultaneously capturing and enhancing task-specific features. To achieve this, HyperDet first groups SRM filters into five distinct groups to efficiently capture varying levels of pixel artifacts based on their different functionality and complexity. Then, HyperDet utilizes a hypernetwork to generate LoRA model weights with distinct embedding parameters. Finally, we merge the LoRA networks to form an efficient model ensemble. Also, we propose a novel objective function that balances the pixel and semantic artifacts effectively. Extensive experiments on the UnivFD and Fake2M datasets demonstrate the effectiveness of our approach, achieving state-of-the-art performance. Moreover, our work paves a new way to establish generalizable domain-specific fake image detectors based on pretrained large vision models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06044v1-abstract-full').style.display = 'none'; document.getElementById('2410.06044v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05951">arXiv:2410.05951</a> <span> [<a href="https://arxiv.org/pdf/2410.05951">pdf</a>, <a href="https://arxiv.org/format/2410.05951">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hyper Adversarial Tuning for Boosting Adversarial Robustness of Pretrained Large Vision Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangtao Lv</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Huangsen Cao</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+K">Kainan Tu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yihuai Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhimeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+X">Xin Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yongwei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05951v1-abstract-short" style="display: inline;"> Large vision models have been found vulnerable to adversarial examples, emphasizing the need for enhancing their adversarial robustness. While adversarial training is an effective defense for deep convolutional models, it often faces scalability issues with large vision models due to high computational costs. Recent approaches propose robust fine-tuning methods, such as adversarial tuning of low-r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05951v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05951v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05951v1-abstract-full" style="display: none;"> Large vision models have been found vulnerable to adversarial examples, emphasizing the need for enhancing their adversarial robustness. While adversarial training is an effective defense for deep convolutional models, it often faces scalability issues with large vision models due to high computational costs. Recent approaches propose robust fine-tuning methods, such as adversarial tuning of low-rank adaptation (LoRA) in large vision models, but they still struggle to match the accuracy of full parameter adversarial fine-tuning. The integration of various defense mechanisms offers a promising approach to enhancing the robustness of large vision models, yet this paradigm remains underexplored. To address this, we propose hyper adversarial tuning (HyperAT), which leverages shared defensive knowledge among different methods to improve model robustness efficiently and effectively simultaneously. Specifically, adversarial tuning of each defense method is formulated as a learning task, and a hypernetwork generates LoRA specific to this defense. Then, a random sampling and tuning strategy is proposed to extract and facilitate the defensive knowledge transfer between different defenses. Finally, diverse LoRAs are merged to enhance the adversarial robustness. Experiments on various datasets and model architectures demonstrate that HyperAT significantly enhances the adversarial robustness of pretrained large vision models without excessive computational overhead, establishing a new state-of-the-art benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05951v1-abstract-full').style.display = 'none'; document.getElementById('2410.05951v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07395">arXiv:2408.07395</a> <span> [<a href="https://arxiv.org/pdf/2408.07395">pdf</a>, <a href="https://arxiv.org/format/2408.07395">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Improving Global Parameter-sharing in Physically Heterogeneous Multi-agent Reinforcement Learning with Unified Action Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaoyang Yu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Youfang Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Sheng Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07395v1-abstract-short" style="display: inline;"> In a multi-agent system (MAS), action semantics indicates the different influences of agents' actions toward other entities, and can be used to divide agents into groups in a physically heterogeneous MAS. Previous multi-agent reinforcement learning (MARL) algorithms apply global parameter-sharing across different types of heterogeneous agents without careful discrimination of different action sema… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07395v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07395v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07395v1-abstract-full" style="display: none;"> In a multi-agent system (MAS), action semantics indicates the different influences of agents' actions toward other entities, and can be used to divide agents into groups in a physically heterogeneous MAS. Previous multi-agent reinforcement learning (MARL) algorithms apply global parameter-sharing across different types of heterogeneous agents without careful discrimination of different action semantics. This common implementation decreases the cooperation and coordination between agents in complex situations. However, fully independent agent parameters dramatically increase the computational cost and training difficulty. In order to benefit from the usage of different action semantics while also maintaining a proper parameter-sharing structure, we introduce the Unified Action Space (UAS) to fulfill the requirement. The UAS is the union set of all agent actions with different semantics. All agents first calculate their unified representation in the UAS, and then generate their heterogeneous action policies using different available-action-masks. To further improve the training of extra UAS parameters, we introduce a Cross-Group Inverse (CGI) loss to predict other groups' agent policies with the trajectory information. As a universal method for solving the physically heterogeneous MARL problem, we implement the UAS adding to both value-based and policy-based MARL algorithms, and propose two practical algorithms: U-QMIX and U-MAPPO. Experimental results in the SMAC environment prove the effectiveness of both U-QMIX and U-MAPPO compared with several state-of-the-art MARL methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07395v1-abstract-full').style.display = 'none'; document.getElementById('2408.07395v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15176">arXiv:2407.15176</a> <span> [<a href="https://arxiv.org/pdf/2407.15176">pdf</a>, <a href="https://arxiv.org/format/2407.15176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ReAttention: Training-Free Infinite Context with Finite Attention Scope </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoran Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixiao Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhigeng Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuerong Song</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hang Yan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linlin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qun Liu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15176v2-abstract-short" style="display: inline;"> The long-context capability of the Large Language Models (LLM) has made significant breakthroughs, but the maximum supported context length remains a critical bottleneck limiting their practical applications. The constraint of context length in LLMs arises from the self-attention mechanism, which cannot effectively and efficiently capture the semantic relationships within infinitely long contexts… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15176v2-abstract-full').style.display = 'inline'; document.getElementById('2407.15176v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15176v2-abstract-full" style="display: none;"> The long-context capability of the Large Language Models (LLM) has made significant breakthroughs, but the maximum supported context length remains a critical bottleneck limiting their practical applications. The constraint of context length in LLMs arises from the self-attention mechanism, which cannot effectively and efficiently capture the semantic relationships within infinitely long contexts via the limited pre-trained positional information and attention scope. In this work, we propose \textbf{ReAttention}, a training-free approach enabling LLM based on the self-attention mechanism to support an infinite context with a finite attention scope under sufficient memory resources. ReAttention performs the position-agnostic top-$k$ attention before the ordinary position-aware self-attention, freeing LLMs from the length extrapolation issue. We validate the performance of ReAttention on the LongBench, L-Eval, and InfiniteBench and demonstrate that it is on par with traditional methods. Furthermore, we also apply ReAttention on mainstream LLMs, including LLaMA3.1-8B and Mistral-v0.3-7B, enabling them to support context lengths of at least 1M and even expanding the context length of LLaMA3.2-3B-chat by 128$\times$ to 4M without any further training in Needle-In-A-Haystack tests. We also improve the efficiency of ReAttention with Triton and achieve an efficient extrapolation without additional overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15176v2-abstract-full').style.display = 'none'; document.getElementById('2407.15176v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18078">arXiv:2405.18078</a> <span> [<a href="https://arxiv.org/pdf/2405.18078">pdf</a>, <a href="https://arxiv.org/format/2405.18078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Edge-guided and Class-balanced Active Learning for Semantic Segmentation of Aerial Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shan%2C+L">Lianlei Shan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Ke Lv</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+B">Bin Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18078v1-abstract-short" style="display: inline;"> Semantic segmentation requires pixel-level annotation, which is time-consuming. Active Learning (AL) is a promising method for reducing data annotation costs. Due to the gap between aerial and natural images, the previous AL methods are not ideal, mainly caused by unreasonable labeling units and the neglect of class imbalance. Previous labeling units are based on images or regions, which does not… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18078v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18078v1-abstract-full" style="display: none;"> Semantic segmentation requires pixel-level annotation, which is time-consuming. Active Learning (AL) is a promising method for reducing data annotation costs. Due to the gap between aerial and natural images, the previous AL methods are not ideal, mainly caused by unreasonable labeling units and the neglect of class imbalance. Previous labeling units are based on images or regions, which does not consider the characteristics of segmentation tasks and aerial images, i.e., the segmentation network often makes mistakes in the edge region, and the edge of aerial images is often interlaced and irregular. Therefore, an edge-guided labeling unit is proposed and supplemented as the new unit. On the other hand, the class imbalance is severe, manifested in two aspects: the aerial image is seriously imbalanced, and the AL strategy does not fully consider the class balance. Both seriously affect the performance of AL in aerial images. We comprehensively ensure class balance from all steps that may occur imbalance, including initial labeled data, subsequent labeled data, and pseudo-labels. Through the two improvements, our method achieves more than 11.2\% gains compared to state-of-the-art methods on three benchmark datasets, Deepglobe, Potsdam, and Vaihingen, and more than 18.6\% gains compared to the baseline. Sufficient ablation studies show that every module is indispensable. Furthermore, we establish a fair and strong benchmark for future research on AL for aerial image segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18078v1-abstract-full').style.display = 'none'; document.getElementById('2405.18078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17297">arXiv:2403.17297</a> <span> [<a href="https://arxiv.org/pdf/2403.17297">pdf</a>, <a href="https://arxiv.org/format/2403.17297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> InternLM2 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zheng Cai</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+M">Maosong Cao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Haojiong Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xun Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zehui Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhi Chen</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+P">Pei Chu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xiaoyi Dong</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Haodong Duan</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Q">Qi Fan</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+Z">Zhaoye Fei</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yang Gao</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+J">Jiaye Ge</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+C">Chenya Gu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuzhe Gu</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+A">Aijia Guo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=He%2C+C">Conghui He</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yingfan Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Ting Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+T">Tao Jiang</a> , et al. (75 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17297v1-abstract-short" style="display: inline;"> The evolution of Large Language Models (LLMs) like ChatGPT and GPT-4 has sparked discussions on the advent of Artificial General Intelligence (AGI). However, replicating such advancements in open-source models has been challenging. This paper introduces InternLM2, an open-source LLM that outperforms its predecessors in comprehensive evaluations across 6 dimensions and 30 benchmarks, long-context m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17297v1-abstract-full').style.display = 'inline'; document.getElementById('2403.17297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17297v1-abstract-full" style="display: none;"> The evolution of Large Language Models (LLMs) like ChatGPT and GPT-4 has sparked discussions on the advent of Artificial General Intelligence (AGI). However, replicating such advancements in open-source models has been challenging. This paper introduces InternLM2, an open-source LLM that outperforms its predecessors in comprehensive evaluations across 6 dimensions and 30 benchmarks, long-context modeling, and open-ended subjective evaluations through innovative pre-training and optimization techniques. The pre-training process of InternLM2 is meticulously detailed, highlighting the preparation of diverse data types including text, code, and long-context data. InternLM2 efficiently captures long-term dependencies, initially trained on 4k tokens before advancing to 32k tokens in pre-training and fine-tuning stages, exhibiting remarkable performance on the 200k ``Needle-in-a-Haystack" test. InternLM2 is further aligned using Supervised Fine-Tuning (SFT) and a novel Conditional Online Reinforcement Learning from Human Feedback (COOL RLHF) strategy that addresses conflicting human preferences and reward hacking. By releasing InternLM2 models in different training stages and model sizes, we provide the community with insights into the model's evolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17297v1-abstract-full').style.display = 'none'; document.getElementById('2403.17297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13583">arXiv:2402.13583</a> <span> [<a href="https://arxiv.org/pdf/2402.13583">pdf</a>, <a href="https://arxiv.org/format/2402.13583">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LongWanjuan: Towards Systematic Measurement for Long Text Quality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoran Liu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hang Yan</a>, <a href="/search/cs?searchtype=author&query=He%2C+C">Conghui He</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dahua Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13583v2-abstract-short" style="display: inline;"> The quality of training data are crucial for enhancing the long-text capabilities of foundation models. Despite existing efforts to refine data quality through heuristic rules and evaluations based on data diversity and difficulty, there's a lack of systematic approaches specifically tailored for assessing long texts. Addressing this gap, our work systematically measures the quality of long texts… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13583v2-abstract-full').style.display = 'inline'; document.getElementById('2402.13583v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13583v2-abstract-full" style="display: none;"> The quality of training data are crucial for enhancing the long-text capabilities of foundation models. Despite existing efforts to refine data quality through heuristic rules and evaluations based on data diversity and difficulty, there's a lack of systematic approaches specifically tailored for assessing long texts. Addressing this gap, our work systematically measures the quality of long texts by evaluating three fundamental linguistic dimensions: coherence, cohesion, and complexity. Drawing inspiration from the aforementioned three dimensions, we introduce a suite of metrics designed to evaluate the quality of long texts, encompassing both statistical and pre-trained language model-based ones. Leveraging these metrics, we present LongWanjuan, a bilingual dataset specifically tailored to enhance the training of language models for long-text tasks with over 160B tokens. In LongWanjuan, we categorize long texts into holistic, aggregated, and chaotic types, enabling a detailed analysis of long-text quality. Furthermore, we devise a data mixture recipe that strategically balances different types of long texts within LongWanjuan, leading to significant improvements in model performance on long-text tasks. The code and dataset are available at https://github.com/OpenLMLab/LongWanjuan. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13583v2-abstract-full').style.display = 'none'; document.getElementById('2402.13583v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Update Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02949">arXiv:2402.02949</a> <span> [<a href="https://arxiv.org/pdf/2402.02949">pdf</a>, <a href="https://arxiv.org/format/2402.02949">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Kernel PCA for Out-of-Distribution Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fang%2C+K">Kun Fang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Q">Qinghua Tao</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kexin Lv</a>, <a href="/search/cs?searchtype=author&query=He%2C+M">Mingzhen He</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaolin Huang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02949v2-abstract-short" style="display: inline;"> Out-of-Distribution (OoD) detection is vital for the reliability of Deep Neural Networks (DNNs). Existing works have shown the insufficiency of Principal Component Analysis (PCA) straightforwardly applied on the features of DNNs in detecting OoD data from In-Distribution (InD) data. The failure of PCA suggests that the network features residing in OoD and InD are not well separated by simply proce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02949v2-abstract-full').style.display = 'inline'; document.getElementById('2402.02949v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02949v2-abstract-full" style="display: none;"> Out-of-Distribution (OoD) detection is vital for the reliability of Deep Neural Networks (DNNs). Existing works have shown the insufficiency of Principal Component Analysis (PCA) straightforwardly applied on the features of DNNs in detecting OoD data from In-Distribution (InD) data. The failure of PCA suggests that the network features residing in OoD and InD are not well separated by simply proceeding in a linear subspace, which instead can be resolved through proper non-linear mappings. In this work, we leverage the framework of Kernel PCA (KPCA) for OoD detection, and seek suitable non-linear kernels that advocate the separability between InD and OoD data in the subspace spanned by the principal components. Besides, explicit feature mappings induced from the devoted task-specific kernels are adopted so that the KPCA reconstruction error for new test samples can be efficiently obtained with large-scale data. Extensive theoretical and empirical results on multiple OoD data sets and network structures verify the superiority of our KPCA detector in efficiency and efficacy with state-of-the-art detection performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02949v2-abstract-full').style.display = 'none'; document.getElementById('2402.02949v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08327">arXiv:2401.08327</a> <span> [<a href="https://arxiv.org/pdf/2401.08327">pdf</a>, <a href="https://arxiv.org/format/2401.08327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learn What You Need in Personalized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kexin Lv</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+R">Rui Ye</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaolin Huang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Siheng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08327v1-abstract-short" style="display: inline;"> Personalized federated learning aims to address data heterogeneity across local clients in federated learning. However, current methods blindly incorporate either full model parameters or predefined partial parameters in personalized federated learning. They fail to customize the collaboration manner according to each local client's data characteristics, causing unpleasant aggregation results. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08327v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08327v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08327v1-abstract-full" style="display: none;"> Personalized federated learning aims to address data heterogeneity across local clients in federated learning. However, current methods blindly incorporate either full model parameters or predefined partial parameters in personalized federated learning. They fail to customize the collaboration manner according to each local client's data characteristics, causing unpleasant aggregation results. To address this essential issue, we propose $\textit{Learn2pFed}$, a novel algorithm-unrolling-based personalized federated learning framework, enabling each client to adaptively select which part of its local model parameters should participate in collaborative training. The key novelty of the proposed $\textit{Learn2pFed}$ is to optimize each local model parameter's degree of participant in collaboration as learnable parameters via algorithm unrolling methods. This approach brings two benefits: 1) mathmatically determining the participation degree of local model parameters in the federated collaboration, and 2) obtaining more stable and improved solutions. Extensive experiments on various tasks, including regression, forecasting, and image classification, demonstrate that $\textit{Learn2pFed}$ significantly outperforms previous personalized federated learning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08327v1-abstract-full').style.display = 'none'; document.getElementById('2401.08327v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.03327">arXiv:2312.03327</a> <span> [<a href="https://arxiv.org/pdf/2312.03327">pdf</a>, <a href="https://arxiv.org/format/2312.03327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Building Category Graphs Representation with Spatial and Temporal Attention for Visual Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaobo Hu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Youfang Lin</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">HeHe Fan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhihao Wu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.03327v1-abstract-short" style="display: inline;"> Given an object of interest, visual navigation aims to reach the object's location based on a sequence of partial observations. To this end, an agent needs to 1) learn a piece of certain knowledge about the relations of object categories in the world during training and 2) look for the target object based on the pre-learned object category relations and its moving trajectory in the current unseen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03327v1-abstract-full').style.display = 'inline'; document.getElementById('2312.03327v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.03327v1-abstract-full" style="display: none;"> Given an object of interest, visual navigation aims to reach the object's location based on a sequence of partial observations. To this end, an agent needs to 1) learn a piece of certain knowledge about the relations of object categories in the world during training and 2) look for the target object based on the pre-learned object category relations and its moving trajectory in the current unseen environment. In this paper, we propose a Category Relation Graph (CRG) to learn the knowledge of object category layout relations and a Temporal-Spatial-Region (TSR) attention architecture to perceive the long-term spatial-temporal dependencies of objects helping the navigation. We learn prior knowledge of object layout, establishing a category relationship graph to deduce the positions of specific objects. Subsequently, we introduced TSR to capture the relationships of objects in temporal, spatial, and regions within the observation trajectories. Specifically, we propose a Temporal attention module (T) to model the temporal structure of the observation sequence, which implicitly encodes the historical moving or trajectory information. Then, a Spatial attention module (S) is used to uncover the spatial context of the current observation objects based on the category relation graph and past observations. Last, a Region attention module (R) shifts the attention to the target-relevant region. Based on the visual representation extracted by our method, the agent can better perceive the environment and easily learn superior navigation policy. Experiments on AI2-THOR demonstrate our CRG-TSR method significantly outperforms existing methods regarding both effectiveness and efficiency. The code has been included in the supplementary material and will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03327v1-abstract-full').style.display = 'none'; document.getElementById('2312.03327v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages; 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.01915">arXiv:2312.01915</a> <span> [<a href="https://arxiv.org/pdf/2312.01915">pdf</a>, <a href="https://arxiv.org/format/2312.01915">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Reliable Representation with Bidirectional Transition Model for Visual Reinforcement Learning Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaobo Hu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Youfang Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinwen Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Hehe Fan</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.01915v1-abstract-short" style="display: inline;"> Visual reinforcement learning has proven effective in solving control tasks with high-dimensional observations. However, extracting reliable and generalizable representations from vision-based observations remains a central challenge. Inspired by the human thought process, when the representation extracted from the observation can predict the future and trace history, the representation is reliabl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01915v1-abstract-full').style.display = 'inline'; document.getElementById('2312.01915v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.01915v1-abstract-full" style="display: none;"> Visual reinforcement learning has proven effective in solving control tasks with high-dimensional observations. However, extracting reliable and generalizable representations from vision-based observations remains a central challenge. Inspired by the human thought process, when the representation extracted from the observation can predict the future and trace history, the representation is reliable and accurate in comprehending the environment. Based on this concept, we introduce a Bidirectional Transition (BiT) model, which leverages the ability to bidirectionally predict environmental transitions both forward and backward to extract reliable representations. Our model demonstrates competitive generalization performance and sample efficiency on two settings of the DeepMind Control suite. Additionally, we utilize robotic manipulation and CARLA simulators to demonstrate the wide applicability of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01915v1-abstract-full').style.display = 'none'; document.getElementById('2312.01915v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.00407">arXiv:2312.00407</a> <span> [<a href="https://arxiv.org/pdf/2312.00407">pdf</a>, <a href="https://arxiv.org/format/2312.00407">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CoLLiE: Collaborative Training of Large Language Models in an Efficient Way </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+T">Tianle Gu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+S">Shuhao Xing</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+J">Jiawei Hong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoran Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuqing Yang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Honglin Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tengxiao Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yu Sun</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hang Yan</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.00407v1-abstract-short" style="display: inline;"> Large language models (LLMs) are increasingly pivotal in a wide range of natural language processing tasks. Access to pre-trained models, courtesy of the open-source community, has made it possible to adapt these models to specific applications for enhanced performance. However, the substantial resources required for training these models necessitate efficient solutions. This paper introduces CoLL… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00407v1-abstract-full').style.display = 'inline'; document.getElementById('2312.00407v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.00407v1-abstract-full" style="display: none;"> Large language models (LLMs) are increasingly pivotal in a wide range of natural language processing tasks. Access to pre-trained models, courtesy of the open-source community, has made it possible to adapt these models to specific applications for enhanced performance. However, the substantial resources required for training these models necessitate efficient solutions. This paper introduces CoLLiE, an efficient library that facilitates collaborative training of large language models using 3D parallelism, parameter-efficient fine-tuning (PEFT) methods, and optimizers such as Lion, Adan, Sophia, LOMO and AdaLomo. With its modular design and comprehensive functionality, CoLLiE offers a balanced blend of efficiency, ease of use, and customization. CoLLiE has proven superior training efficiency in comparison with prevalent solutions in pre-training and fine-tuning scenarios. Furthermore, we provide an empirical evaluation of the correlation between model size and GPU memory consumption under different optimization methods, as well as an analysis of the throughput. Lastly, we carry out a comprehensive comparison of various optimizers and PEFT methods within the instruction-tuning context. CoLLiE is available at https://github.com/OpenLMLab/collie. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00407v1-abstract-full').style.display = 'none'; document.getElementById('2312.00407v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at EMNLP 2023 Demo; Code is available at https://github.com/OpenLMLab/collie</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16133">arXiv:2311.16133</a> <span> [<a href="https://arxiv.org/pdf/2311.16133">pdf</a>, <a href="https://arxiv.org/format/2311.16133">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Effective Quantization for Diffusion Models on CPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chang%2C+H">Hanwen Chang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Haihao Shen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yiyang Cai</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xinyu Ye</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhenzhong Xu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wenhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kaokao Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yintong Lu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Heng Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16133v2-abstract-short" style="display: inline;"> Diffusion models have gained popularity for generating images from textual descriptions. Nonetheless, the substantial need for computational resources continues to present a noteworthy challenge, contributing to time-consuming processes. Quantization, a technique employed to compress deep learning models for enhanced efficiency, presents challenges when applied to diffusion models. These models ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16133v2-abstract-full').style.display = 'inline'; document.getElementById('2311.16133v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16133v2-abstract-full" style="display: none;"> Diffusion models have gained popularity for generating images from textual descriptions. Nonetheless, the substantial need for computational resources continues to present a noteworthy challenge, contributing to time-consuming processes. Quantization, a technique employed to compress deep learning models for enhanced efficiency, presents challenges when applied to diffusion models. These models are notably more sensitive to quantization compared to other model types, potentially resulting in a degradation of image quality. In this paper, we introduce a novel approach to quantize the diffusion models by leveraging both quantization-aware training and distillation. Our results show the quantized models can maintain the high image quality while demonstrating the inference efficiency on CPUs. The code is publicly available at: https://github.com/intel/intel-extension-for-transformers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16133v2-abstract-full').style.display = 'none'; document.getElementById('2311.16133v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.10944">arXiv:2310.10944</a> <span> [<a href="https://arxiv.org/pdf/2310.10944">pdf</a>, <a href="https://arxiv.org/format/2310.10944">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TEQ: Trainable Equivalent Transformation for Quantization of LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wenhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yiyang Cai</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kaokao Lv</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Haihao Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.10944v1-abstract-short" style="display: inline;"> As large language models (LLMs) become more prevalent, there is a growing need for new and improved quantization methods that can meet the computationalast layer demands of these modern architectures while maintaining the accuracy. In this paper, we present TEQ, a trainable equivalent transformation that preserves the FP32 precision of the model output while taking advantage of low-precision quant… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10944v1-abstract-full').style.display = 'inline'; document.getElementById('2310.10944v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.10944v1-abstract-full" style="display: none;"> As large language models (LLMs) become more prevalent, there is a growing need for new and improved quantization methods that can meet the computationalast layer demands of these modern architectures while maintaining the accuracy. In this paper, we present TEQ, a trainable equivalent transformation that preserves the FP32 precision of the model output while taking advantage of low-precision quantization, especially 3 and 4 bits weight-only quantization. The training process is lightweight, requiring only 1K steps and fewer than 0.1 percent of the original model's trainable parameters. Furthermore, the transformation does not add any computational overhead during inference. Our results are on-par with the state-of-the-art (SOTA) methods on typical LLMs. Our approach can be combined with other methods to achieve even better performance. The code is available at https://github.com/intel/neural-compressor. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10944v1-abstract-full').style.display = 'none'; document.getElementById('2310.10944v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.10195">arXiv:2310.10195</a> <span> [<a href="https://arxiv.org/pdf/2310.10195">pdf</a>, <a href="https://arxiv.org/format/2310.10195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AdaLomo: Low-memory Optimization with Adaptive Learning Rate </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hang Yan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+H">Haijun Lv</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.10195v3-abstract-short" style="display: inline;"> Large language models have achieved remarkable success, but their extensive parameter size necessitates substantial memory for training, thereby setting a high threshold. While the recently proposed low-memory optimization (LOMO) reduces memory footprint, its optimization technique, akin to stochastic gradient descent, is sensitive to hyper-parameters and exhibits suboptimal convergence, failing t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10195v3-abstract-full').style.display = 'inline'; document.getElementById('2310.10195v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.10195v3-abstract-full" style="display: none;"> Large language models have achieved remarkable success, but their extensive parameter size necessitates substantial memory for training, thereby setting a high threshold. While the recently proposed low-memory optimization (LOMO) reduces memory footprint, its optimization technique, akin to stochastic gradient descent, is sensitive to hyper-parameters and exhibits suboptimal convergence, failing to match the performance of the prevailing optimizer for large language models, AdamW. Through empirical analysis of the Adam optimizer, we found that, compared to momentum, the adaptive learning rate is more critical for bridging the gap. Building on this insight, we introduce the low-memory optimization with adaptive learning rate (AdaLomo), which offers an adaptive learning rate for each parameter. To maintain memory efficiency, we employ non-negative matrix factorization for the second-order moment estimation in the optimizer state. Additionally, we suggest the use of a grouped update normalization to stabilize convergence. Our experiments with instruction-tuning and further pre-training demonstrate that AdaLomo achieves results on par with AdamW, while significantly reducing memory requirements, thereby lowering the hardware barrier to training large language models. The code is accessible at https://github.com/OpenLMLab/LOMO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10195v3-abstract-full').style.display = 'none'; document.getElementById('2310.10195v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024 camera ready version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09899">arXiv:2310.09899</a> <span> [<a href="https://arxiv.org/pdf/2310.09899">pdf</a>, <a href="https://arxiv.org/format/2310.09899">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Generalizable whole-body global manipulation of deformable linear objects by dual-arm robot in 3-D constrained environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+M">Mingrui Yu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changhao Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yongpeng Jiang</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09899v2-abstract-short" style="display: inline;"> Constrained environments are common in practical applications of manipulating deformable linear objects (DLOs), where movements of both DLOs and robots should be constrained. This task is high-dimensional and highly constrained owing to the highly deformable DLOs, dual-arm robots with high degrees of freedom, and 3-D complex environments, which render global planning challenging. Furthermore, accu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09899v2-abstract-full').style.display = 'inline'; document.getElementById('2310.09899v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09899v2-abstract-full" style="display: none;"> Constrained environments are common in practical applications of manipulating deformable linear objects (DLOs), where movements of both DLOs and robots should be constrained. This task is high-dimensional and highly constrained owing to the highly deformable DLOs, dual-arm robots with high degrees of freedom, and 3-D complex environments, which render global planning challenging. Furthermore, accurate DLO models needed by planning are often unavailable owing to their strong nonlinearity and diversity, resulting in unreliable planned paths. This article focuses on the global moving and shaping of DLOs in constrained environments by dual-arm robots. The main objectives are 1) to efficiently and accurately accomplish this task, and 2) to achieve generalizable and robust manipulation of various DLOs. To this end, we propose a complementary framework with whole-body planning and control using appropriate DLO model representations. First, a global planner is proposed to efficiently find feasible solutions based on a simplified DLO energy model, which considers the full system states and all constraints to plan more reliable paths. Then, a closed-loop manipulation scheme is proposed to compensate for the modeling errors and enhance the robustness and accuracy, which incorporates a model predictive controller that real-time adjusts the robot motion based on an adaptive DLO motion model. The key novelty is that our framework can efficiently solve the high-dimensional problem subject to multiple constraints and generalize to various DLOs without elaborate model identifications. Experiments demonstrate that our framework can accomplish considerably more complicated tasks than existing works, with significantly higher efficiency, generalizability, and reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09899v2-abstract-full').style.display = 'none'; document.getElementById('2310.09899v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IJRR. Project website: https://mingrui-yu.github.io/DLO_planning_2</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.05516">arXiv:2309.05516</a> <span> [<a href="https://arxiv.org/pdf/2309.05516">pdf</a>, <a href="https://arxiv.org/format/2309.05516">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wenhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Haihao Shen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yiyang Cai</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kaokao Lv</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.05516v5-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated exceptional proficiency in language-related tasks, but their deployment poses significant challenges due to substantial memory and storage requirements. Weight-only quantization has emerged as a promising solution, significantly reducing memory and storage needs without sacrificing too much performance. In this study, we introduce SignRound, a method… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05516v5-abstract-full').style.display = 'inline'; document.getElementById('2309.05516v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.05516v5-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated exceptional proficiency in language-related tasks, but their deployment poses significant challenges due to substantial memory and storage requirements. Weight-only quantization has emerged as a promising solution, significantly reducing memory and storage needs without sacrificing too much performance. In this study, we introduce SignRound, a method that leverages signed gradient descent (SignSGD) to optimize rounding values and weight clipping in just 200 steps. SignRound integrates the advantages of Quantization-Aware Training (QAT) and Post-Training Quantization (PTQ), delivering exceptional results across 2 to 4 bits while minimizing tuning costs and avoiding additional inference overhead. For example, SignRound achieved absolute average accuracy improvements ranging from 6.91% to 33.22% at 2bits, as measured by the average zero-shot accuracy across 11 tasks. It also demonstrates strong generalization in recent models, achieving near-lossless 4-bit quantization in most scenarios. The source code is publicly available at https://github.com/intel/auto-round. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05516v5-abstract-full').style.display = 'none'; document.getElementById('2309.05516v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP24 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.09782">arXiv:2306.09782</a> <span> [<a href="https://arxiv.org/pdf/2306.09782">pdf</a>, <a href="https://arxiv.org/format/2306.09782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Full Parameter Fine-tuning for Large Language Models with Limited Resources </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuqing Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tengxiao Liu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Q">Qinghui Gao</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.09782v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have revolutionized Natural Language Processing (NLP) but demand massive GPU resources for training. Lowering the threshold for LLMs training would encourage greater participation from researchers, benefiting both academia and society. While existing approaches have focused on parameter-efficient fine-tuning, which tunes or adds a small number of parameters, few have a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.09782v2-abstract-full').style.display = 'inline'; document.getElementById('2306.09782v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.09782v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have revolutionized Natural Language Processing (NLP) but demand massive GPU resources for training. Lowering the threshold for LLMs training would encourage greater participation from researchers, benefiting both academia and society. While existing approaches have focused on parameter-efficient fine-tuning, which tunes or adds a small number of parameters, few have addressed the challenge of tuning the full parameters of LLMs with limited resources. In this work, we propose a new optimizer, LOw-Memory Optimization (LOMO), which fuses the gradient computation and the parameter update in one step to reduce memory usage. By integrating LOMO with existing memory saving techniques, we reduce memory usage to 10.8% compared to the standard approach (DeepSpeed solution). Consequently, our approach enables the full parameter fine-tuning of a 65B model on a single machine with 8 RTX 3090, each with 24GB memory.Code and data are available at https://github.com/OpenLMLab/LOMO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.09782v2-abstract-full').style.display = 'none'; document.getElementById('2306.09782v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.04320">arXiv:2305.04320</a> <span> [<a href="https://arxiv.org/pdf/2305.04320">pdf</a>, <a href="https://arxiv.org/format/2305.04320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unified Demonstration Retriever for In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaonan Li</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hang Yan</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tianyang Lin</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+W">Wei Zhu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yuan Ni</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+G">Guotong Xie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoling Wang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.04320v2-abstract-short" style="display: inline;"> In-context learning is a new learning paradigm where a language model conditions on a few input-output pairs (demonstrations) and a test input, and directly outputs the prediction. It has been shown highly dependent on the provided demonstrations and thus promotes the research of demonstration retrieval: given a test input, relevant examples are retrieved from the training set to serve as informat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.04320v2-abstract-full').style.display = 'inline'; document.getElementById('2305.04320v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.04320v2-abstract-full" style="display: none;"> In-context learning is a new learning paradigm where a language model conditions on a few input-output pairs (demonstrations) and a test input, and directly outputs the prediction. It has been shown highly dependent on the provided demonstrations and thus promotes the research of demonstration retrieval: given a test input, relevant examples are retrieved from the training set to serve as informative demonstrations for in-context learning. While previous works focus on training task-specific retrievers for several tasks separately, these methods are often hard to transfer and scale on various tasks, and separately trained retrievers incur a lot of parameter storage and deployment cost. In this paper, we propose Unified Demonstration Retriever (\textbf{UDR}), a single model to retrieve demonstrations for a wide range of tasks. To train UDR, we cast various tasks' training signals into a unified list-wise ranking formulation by language model's feedback. Then we propose a multi-task list-wise ranking training framework, with an iterative mining strategy to find high-quality candidates, which can help UDR fully incorporate various tasks' signals. Experiments on 30+ tasks across 13 task families and multiple data domains show that UDR significantly outperforms baselines. Further analyses show the effectiveness of each proposed component and UDR's strong ability in various scenarios including different LMs (1.3B - 175B), unseen datasets, varying demonstration quantities, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.04320v2-abstract-full').style.display = 'none'; document.getElementById('2305.04320v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2023 camera ready version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01070">arXiv:2303.01070</a> <span> [<a href="https://arxiv.org/pdf/2303.01070">pdf</a>, <a href="https://arxiv.org/format/2303.01070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s40747-024-01415-1">10.1007/s40747-024-01415-1 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GHQ: Grouped Hybrid Q Learning for Heterogeneous Cooperative Multi-agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaoyang Yu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Youfang Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangsen Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Sheng Han</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01070v2-abstract-short" style="display: inline;"> Previous deep multi-agent reinforcement learning (MARL) algorithms have achieved impressive results, typically in homogeneous scenarios. However, heterogeneous scenarios are also very common and usually harder to solve. In this paper, we mainly discuss cooperative heterogeneous MARL problems in Starcraft Multi-Agent Challenges (SMAC) environment. We firstly define and describe the heterogeneous pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01070v2-abstract-full').style.display = 'inline'; document.getElementById('2303.01070v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01070v2-abstract-full" style="display: none;"> Previous deep multi-agent reinforcement learning (MARL) algorithms have achieved impressive results, typically in homogeneous scenarios. However, heterogeneous scenarios are also very common and usually harder to solve. In this paper, we mainly discuss cooperative heterogeneous MARL problems in Starcraft Multi-Agent Challenges (SMAC) environment. We firstly define and describe the heterogeneous problems in SMAC. In order to comprehensively reveal and study the problem, we make new maps added to the original SMAC maps. We find that baseline algorithms fail to perform well in those heterogeneous maps. To address this issue, we propose the Grouped Individual-Global-Max Consistency (GIGM) and a novel MARL algorithm, Grouped Hybrid Q Learning (GHQ). GHQ separates agents into several groups and keeps individual parameters for each group, along with a novel hybrid structure for factorization. To enhance coordination between groups, we maximize the Inter-group Mutual Information (IGMI) between groups' trajectories. Experiments on original and new heterogeneous maps show the fabulous performance of GHQ compared to other state-of-the-art algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01070v2-abstract-full').style.display = 'none'; document.getElementById('2303.01070v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.01433">arXiv:2210.01433</a> <span> [<a href="https://arxiv.org/pdf/2210.01433">pdf</a>, <a href="https://arxiv.org/format/2210.01433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning to Estimate 3-D States of Deformable Linear Objects from Single-Frame Occluded Point Clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+M">Mingrui Yu</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+Y">Yifan Pu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xin Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+G">Gao Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.01433v2-abstract-short" style="display: inline;"> Accurately and robustly estimating the state of deformable linear objects (DLOs), such as ropes and wires, is crucial for DLO manipulation and other applications. However, it remains a challenging open issue due to the high dimensionality of the state space, frequent occlusions, and noises. This paper focuses on learning to robustly estimate the states of DLOs from single-frame point clouds in the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01433v2-abstract-full').style.display = 'inline'; document.getElementById('2210.01433v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.01433v2-abstract-full" style="display: none;"> Accurately and robustly estimating the state of deformable linear objects (DLOs), such as ropes and wires, is crucial for DLO manipulation and other applications. However, it remains a challenging open issue due to the high dimensionality of the state space, frequent occlusions, and noises. This paper focuses on learning to robustly estimate the states of DLOs from single-frame point clouds in the presence of occlusions using a data-driven method. We propose a novel two-branch network architecture to exploit global and local information of input point cloud respectively and design a fusion module to effectively leverage the advantages of both methods. Simulation and real-world experimental results demonstrate that our method can generate globally smooth and locally precise DLO state estimation results even with heavily occluded point clouds, which can be directly applied to real-world robotic manipulation of DLOs in 3-D space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01433v2-abstract-full').style.display = 'none'; document.getElementById('2210.01433v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by International Conference on Robotics and Automation (ICRA) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.11145">arXiv:2209.11145</a> <span> [<a href="https://arxiv.org/pdf/2209.11145">pdf</a>, <a href="https://arxiv.org/format/2209.11145">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A Coarse-to-Fine Framework for Dual-Arm Manipulation of Deformable Linear Objects with Whole-Body Obstacle Avoidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+M">Mingrui Yu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changhao Wang</a>, <a href="/search/cs?searchtype=author&query=Tomizuka%2C+M">Masayoshi Tomizuka</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.11145v3-abstract-short" style="display: inline;"> Manipulating deformable linear objects (DLOs) to achieve desired shapes in constrained environments with obstacles is a meaningful but challenging task. Global planning is necessary for such a highly-constrained task; however, accurate models of DLOs required by planners are difficult to obtain owing to their deformable nature, and the inevitable modeling errors significantly affect the planning r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.11145v3-abstract-full').style.display = 'inline'; document.getElementById('2209.11145v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.11145v3-abstract-full" style="display: none;"> Manipulating deformable linear objects (DLOs) to achieve desired shapes in constrained environments with obstacles is a meaningful but challenging task. Global planning is necessary for such a highly-constrained task; however, accurate models of DLOs required by planners are difficult to obtain owing to their deformable nature, and the inevitable modeling errors significantly affect the planning results, probably resulting in task failure if the robot simply executes the planned path in an open-loop manner. In this paper, we propose a coarse-to-fine framework to combine global planning and local control for dual-arm manipulation of DLOs, capable of precisely achieving desired configurations and avoiding potential collisions between the DLO, robot, and obstacles. Specifically, the global planner refers to a simple yet effective DLO energy model and computes a coarse path to find a feasible solution efficiently; then the local controller follows that path as guidance and further shapes it with closed-loop feedback to compensate for the planning errors and improve the task accuracy. Both simulations and real-world experiments demonstrate that our framework can robustly achieve desired DLO configurations in constrained environments with imprecise DLO models, which may not be reliably achieved by only planning or control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.11145v3-abstract-full').style.display = 'none'; document.getElementById('2209.11145v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICRA 2023. Project website: https://mingrui-yu.github.io/DLO_planning/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.14690">arXiv:2205.14690</a> <span> [<a href="https://arxiv.org/pdf/2205.14690">pdf</a>, <a href="https://arxiv.org/format/2205.14690">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CoNT: Contrastive Neural Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=An%2C+C">Chenxin An</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jiangtao Feng</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+L">Lingpeng Kong</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.14690v4-abstract-short" style="display: inline;"> Recently, contrastive learning attracts increasing interests in neural text generation as a new solution to alleviate the exposure bias problem. It introduces a sequence-level training signal which is crucial to generation tasks that always rely on auto-regressive decoding. However, previous methods using contrastive learning in neural text generation usually lead to inferior performance. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.14690v4-abstract-full').style.display = 'inline'; document.getElementById('2205.14690v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.14690v4-abstract-full" style="display: none;"> Recently, contrastive learning attracts increasing interests in neural text generation as a new solution to alleviate the exposure bias problem. It introduces a sequence-level training signal which is crucial to generation tasks that always rely on auto-regressive decoding. However, previous methods using contrastive learning in neural text generation usually lead to inferior performance. In this paper, we analyse the underlying reasons and propose a new Contrastive Neural Text generation framework, CoNT. CoNT addresses bottlenecks that prevent contrastive learning from being widely adopted in generation tasks from three aspects -- the construction of contrastive examples, the choice of the contrastive loss, and the strategy in decoding. We validate CoNT on five generation tasks with ten benchmarks, including machine translation, summarization, code comment generation, data-to-text generation and commonsense generation. Experimental results show that CoNT clearly outperforms the conventional training framework on all the ten benchmarks with a convincing margin. Especially, CoNT surpasses previous the most competitive contrastive learning method for text generation, by 1.50 BLEU on machine translation and 1.77 ROUGE-1 on summarization, respectively. It achieves new state-of-the-art on summarization, code comment generation (without external data) and data-to-text generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.14690v4-abstract-full').style.display = 'none'; document.getElementById('2205.14690v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.04004">arXiv:2205.04004</a> <span> [<a href="https://arxiv.org/pdf/2205.04004">pdf</a>, <a href="https://arxiv.org/format/2205.04004">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TRO.2022.3200546">10.1109/TRO.2022.3200546 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Global Model Learning for Large Deformation Control of Elastic Deformable Linear Objects: An Efficient and Adaptive Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+M">Mingrui Yu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+H">Hanzhong Zhong</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Shiji Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.04004v2-abstract-short" style="display: inline;"> Robotic manipulation of deformable linear objects (DLOs) has broad application prospects in many fields. However, a key issue is to obtain the exact deformation models (i.e., how robot motion affects DLO deformation), which are hard to theoretically calculate and vary among different DLOs. Thus, shape control of DLOs is challenging, especially for large deformation control which requires global an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.04004v2-abstract-full').style.display = 'inline'; document.getElementById('2205.04004v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.04004v2-abstract-full" style="display: none;"> Robotic manipulation of deformable linear objects (DLOs) has broad application prospects in many fields. However, a key issue is to obtain the exact deformation models (i.e., how robot motion affects DLO deformation), which are hard to theoretically calculate and vary among different DLOs. Thus, shape control of DLOs is challenging, especially for large deformation control which requires global and more accurate models. In this paper, we propose a coupled offline and online data-driven method for efficiently learning a global deformation model, allowing for both accurate modeling through offline learning and further updating for new DLOs via online adaptation. Specifically, the model approximated by a neural network is first trained offline on random data, then seamlessly migrated to the online phase, and further updated online during actual manipulation. Several strategies are introduced to improve the model's efficiency and generalization ability. We propose a convex-optimization-based controller, and analyze the system's stability using the Lyapunov method. Detailed simulations and real-world experiments demonstrate that our method can efficiently and precisely estimate the deformation model, and achieve large deformation control of untrained DLOs in 2D and 3D dual-arm manipulation tasks better than the existing methods. It accomplishes all 24 tasks with different desired shapes on different DLOs in the real world, using only simulation data for the offline learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.04004v2-abstract-full').style.display = 'none'; document.getElementById('2205.04004v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Robotics. Project website: https://mingrui-yu.github.io/shape_control_DLO_2 . Journal version of arXiv:2109.11091</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.09213">arXiv:2201.09213</a> <span> [<a href="https://arxiv.org/pdf/2201.09213">pdf</a>, <a href="https://arxiv.org/ps/2201.09213">ps</a>, <a href="https://arxiv.org/format/2201.09213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FN-Net:Remove the Outliers by Filtering the Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.09213v1-abstract-short" style="display: inline;"> Establishing the correspondence between two images is an important research direction of computer vision. When estimating the relationship between two images, it is often disturbed by outliers. In this paper, we propose a convolutional neural network that can filter the noise of outliers. It can output the probability that the pair of feature points is an inlier and regress the essential matrix re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.09213v1-abstract-full').style.display = 'inline'; document.getElementById('2201.09213v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.09213v1-abstract-full" style="display: none;"> Establishing the correspondence between two images is an important research direction of computer vision. When estimating the relationship between two images, it is often disturbed by outliers. In this paper, we propose a convolutional neural network that can filter the noise of outliers. It can output the probability that the pair of feature points is an inlier and regress the essential matrix representing the relative pose of the camera. The outliers are mainly caused by the noise introduced by the previous processing. The outliers rejection can be treated as a problem of noise elimination, and the soft threshold function has a very good effect on noise reduction. Therefore, we designed an adaptive denoising module based on soft threshold function to remove noise components in the outliers, to reduce the probability that the outlier is predicted to be an inlier. Experimental results on the YFCC100M dataset show that our method exceeds the state-of-the-art in relative pose estimation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.09213v1-abstract-full').style.display = 'none'; document.getElementById('2201.09213v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T06 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.07286">arXiv:2201.07286</a> <span> [<a href="https://arxiv.org/pdf/2201.07286">pdf</a>, <a href="https://arxiv.org/format/2201.07286">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Conservative Distributional Reinforcement Learning with Safety Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hengrui Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Youfang Lin</a>, <a href="/search/cs?searchtype=author&query=Han%2C+S">Sheng Han</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.07286v2-abstract-short" style="display: inline;"> Safety exploration can be regarded as a constrained Markov decision problem where the expected long-term cost is constrained. Previous off-policy algorithms convert the constrained optimization problem into the corresponding unconstrained dual problem by introducing the Lagrangian relaxation technique. However, the cost function of the above algorithms provides inaccurate estimations and causes th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.07286v2-abstract-full').style.display = 'inline'; document.getElementById('2201.07286v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.07286v2-abstract-full" style="display: none;"> Safety exploration can be regarded as a constrained Markov decision problem where the expected long-term cost is constrained. Previous off-policy algorithms convert the constrained optimization problem into the corresponding unconstrained dual problem by introducing the Lagrangian relaxation technique. However, the cost function of the above algorithms provides inaccurate estimations and causes the instability of the Lagrange multiplier learning. In this paper, we present a novel off-policy reinforcement learning algorithm called Conservative Distributional Maximum a Posteriori Policy Optimization (CDMPO). At first, to accurately judge whether the current situation satisfies the constraints, CDMPO adapts distributional reinforcement learning method to estimate the Q-function and C-function. Then, CDMPO uses a conservative value function loss to reduce the number of violations of constraints during the exploration process. In addition, we utilize Weighted Average Proportional Integral Derivative (WAPID) to update the Lagrange multiplier stably. Empirical results show that the proposed method has fewer violations of constraints in the early exploration process. The final test results also illustrate that our method has better risk control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.07286v2-abstract-full').style.display = 'none'; document.getElementById('2201.07286v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.03014">arXiv:2201.03014</a> <span> [<a href="https://arxiv.org/pdf/2201.03014">pdf</a>, <a href="https://arxiv.org/format/2201.03014">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Glance and Focus Networks for Dynamic Visual Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+G">Gao Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yulin Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Haojun Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhui Huang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+P">Pengfei Qi</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Shiji Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.03014v2-abstract-short" style="display: inline;"> Spatial redundancy widely exists in visual recognition tasks, i.e., discriminative features in an image or video frame usually correspond to only a subset of pixels, while the remaining regions are irrelevant to the task at hand. Therefore, static models which process all the pixels with an equal amount of computation result in considerable redundancy in terms of time and space consumption. In thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.03014v2-abstract-full').style.display = 'inline'; document.getElementById('2201.03014v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.03014v2-abstract-full" style="display: none;"> Spatial redundancy widely exists in visual recognition tasks, i.e., discriminative features in an image or video frame usually correspond to only a subset of pixels, while the remaining regions are irrelevant to the task at hand. Therefore, static models which process all the pixels with an equal amount of computation result in considerable redundancy in terms of time and space consumption. In this paper, we formulate the image recognition problem as a sequential coarse-to-fine feature learning process, mimicking the human visual system. Specifically, the proposed Glance and Focus Network (GFNet) first extracts a quick global representation of the input image at a low resolution scale, and then strategically attends to a series of salient (small) regions to learn finer features. The sequential process naturally facilitates adaptive inference at test time, as it can be terminated once the model is sufficiently confident about its prediction, avoiding further redundant computation. It is worth noting that the problem of locating discriminant regions in our model is formulated as a reinforcement learning task, thus requiring no additional manual annotations other than classification labels. GFNet is general and flexible as it is compatible with any off-the-shelf backbone models (such as MobileNets, EfficientNets and TSM), which can be conveniently deployed as the feature extractor. Extensive experiments on a variety of image classification and video recognition tasks and with various backbone models demonstrate the remarkable efficiency of our method. For example, it reduces the average latency of the highly efficient MobileNet-V3 on an iPhone XS Max by 1.3x without sacrificing accuracy. Code and pre-trained models are available at https://github.com/blackfeather-wang/GFNet-Pytorch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.03014v2-abstract-full').style.display = 'none'; document.getElementById('2201.03014v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI). Journal version of arXiv:2010.05300 (NeurIPS 2020). The first two authors contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.14422">arXiv:2111.14422</a> <span> [<a href="https://arxiv.org/pdf/2111.14422">pdf</a>, <a href="https://arxiv.org/format/2111.14422">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Agent-Centric Relation Graph for Object Visual Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaobo Hu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Youfang Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhihao Wu</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.14422v3-abstract-short" style="display: inline;"> Object visual navigation aims to steer an agent toward a target object based on visual observations. It is highly desirable to reasonably perceive the environment and accurately control the agent. In the navigation task, we introduce an Agent-Centric Relation Graph (ACRG) for learning the visual representation based on the relationships in the environment. ACRG is a highly effective structure that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.14422v3-abstract-full').style.display = 'inline'; document.getElementById('2111.14422v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.14422v3-abstract-full" style="display: none;"> Object visual navigation aims to steer an agent toward a target object based on visual observations. It is highly desirable to reasonably perceive the environment and accurately control the agent. In the navigation task, we introduce an Agent-Centric Relation Graph (ACRG) for learning the visual representation based on the relationships in the environment. ACRG is a highly effective structure that consists of two relationships, i.e., the horizontal relationship among objects and the distance relationship between the agent and objects . On the one hand, we design the Object Horizontal Relationship Graph (OHRG) that stores the relative horizontal location among objects. On the other hand, we propose the Agent-Target Distance Relationship Graph (ATDRG) that enables the agent to perceive the distance between the target and objects. For ATDRG, we utilize image depth to obtain the target distance and imply the vertical location to capture the distance relationship among objects in the vertical direction. With the above graphs, the agent can perceive the environment and output navigation actions. Experimental results in the artificial environment AI2-THOR demonstrate that ACRG significantly outperforms other state-of-the-art methods in unseen testing environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.14422v3-abstract-full').style.display = 'none'; document.getElementById('2111.14422v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 13 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.00632">arXiv:2108.00632</a> <span> [<a href="https://arxiv.org/pdf/2108.00632">pdf</a>, <a href="https://arxiv.org/format/2108.00632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3514221.3526171">10.1145/3514221.3526171 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Skeena: Efficient and Consistent Cross-Engine Transactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqiu Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaisong Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianzheng Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">King Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.00632v5-abstract-short" style="display: inline;"> Database systems are becoming increasingly multi-engine. In particular, a main-memory database engine may coexist with a traditional storage-centric engine in a system to support various applications. It is desirable to allow applications to access data in both engines using cross-engine transactions. But existing systems are either only designed for single-engine accesses, or impose many restrict… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.00632v5-abstract-full').style.display = 'inline'; document.getElementById('2108.00632v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.00632v5-abstract-full" style="display: none;"> Database systems are becoming increasingly multi-engine. In particular, a main-memory database engine may coexist with a traditional storage-centric engine in a system to support various applications. It is desirable to allow applications to access data in both engines using cross-engine transactions. But existing systems are either only designed for single-engine accesses, or impose many restrictions by limiting cross-engine transactions to certain isolation levels and table operations. The result is inadequate cross-engine support in terms of correctness, performance and programmability. This paper describes Skeena, a holistic approach to cross-engine transactions. We propose a lightweight snapshot tracking structure and an atomic commit protocol to efficiently ensure correctness and support various isolation levels. Evaluation results show that Skeena maintains high performance for single-engine transactions and enables cross-engine transactions which can improve throughput by up to 30x by judiciously placing tables in different engines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.00632v5-abstract-full').style.display = 'none'; document.getElementById('2108.00632v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at SIGMOD 2022</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 2022 International Conference on Management of Data </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.00625">arXiv:2105.00625</a> <span> [<a href="https://arxiv.org/pdf/2105.00625">pdf</a>, <a href="https://arxiv.org/ps/2105.00625">ps</a>, <a href="https://arxiv.org/format/2105.00625">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Three-Party Integer Comparison and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jie Ma</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+B">Bin Qi</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kewei Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.00625v1-abstract-short" style="display: inline;"> Secure integer comparison has been a popular research topic in cryptography, both for its simplicity to describe and for its applications. The aim is to enable two parties to compare their inputs without revealing the exact value of those inputs. In this paper, we highlight three-party integer comparison (TPIC), where a \emph{judge}, with no private input, wants to know the comparison result, wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.00625v1-abstract-full').style.display = 'inline'; document.getElementById('2105.00625v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.00625v1-abstract-full" style="display: none;"> Secure integer comparison has been a popular research topic in cryptography, both for its simplicity to describe and for its applications. The aim is to enable two parties to compare their inputs without revealing the exact value of those inputs. In this paper, we highlight three-party integer comparison (TPIC), where a \emph{judge}, with no private input, wants to know the comparison result, while two \emph{competitors} hold secret integers to do privacy-preserving comparison. The judge actively obtains the result rather than passively waiting for it sent by a competitor. We give two TPIC constructions considering \emph{Mixed adversaries}, who have with different capabilities. One is secure against a semi-honest adversary with low computation and communication cost, while the other is secure against a malicious adversary. Basing on TPIC, we present multi-party comparisons through concrete applications, including a joint bidding scheme and a practical auction. Brief security proofs and analysis for the applications are presented. In comparison, our auction scheme is more efficient with lower cost, making it feasible in practice rather than a theoretical design. All the comparisons and application schemes run on top of blockchain requiring a constant number of rounds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.00625v1-abstract-full').style.display = 'none'; document.getElementById('2105.00625v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.01780">arXiv:2103.01780</a> <span> [<a href="https://arxiv.org/pdf/2103.01780">pdf</a>, <a href="https://arxiv.org/ps/2103.01780">ps</a>, <a href="https://arxiv.org/format/2103.01780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A region-based descriptor network for uniformly sampled keypoints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kai Lv</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zongqing Lu</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Q">Qingmin Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.01780v1-abstract-short" style="display: inline;"> Matching keypoint pairs of different images is a basic task of computer vision. Most methods require customized extremum point schemes to obtain the coordinates of feature points with high confidence, which often need complex algorithmic design or a network with higher training difficulty and also ignore the possibility that flat regions can be used as candidate regions of matching points. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.01780v1-abstract-full').style.display = 'inline'; document.getElementById('2103.01780v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.01780v1-abstract-full" style="display: none;"> Matching keypoint pairs of different images is a basic task of computer vision. Most methods require customized extremum point schemes to obtain the coordinates of feature points with high confidence, which often need complex algorithmic design or a network with higher training difficulty and also ignore the possibility that flat regions can be used as candidate regions of matching points. In this paper, we design a region-based descriptor by combining the context features of a deep network. The new descriptor can give a robust representation of a point even in flat regions. By the new descriptor, we can obtain more high confidence matching points without extremum operation. The experimental results show that our proposed method achieves a performance comparable to state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.01780v1-abstract-full').style.display = 'none'; document.getElementById('2103.01780v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T06 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.11625">arXiv:2010.11625</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> One-shot Distributed Algorithm for Generalized Eigenvalue Problem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kexin Lv</a>, <a href="/search/cs?searchtype=author&query=He%2C+F">Fan He</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaolin Huang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liming Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.11625v2-abstract-short" style="display: inline;"> Nowadays, more and more datasets are stored in a distributed way for the sake of memory storage or data privacy. The generalized eigenvalue problem (GEP) plays a vital role in a large family of high-dimensional statistical models. However, the existing distributed method for eigenvalue decomposition cannot be applied in GEP for the divergence of the empirical covariance matrix. Here we propose a g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.11625v2-abstract-full').style.display = 'inline'; document.getElementById('2010.11625v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.11625v2-abstract-full" style="display: none;"> Nowadays, more and more datasets are stored in a distributed way for the sake of memory storage or data privacy. The generalized eigenvalue problem (GEP) plays a vital role in a large family of high-dimensional statistical models. However, the existing distributed method for eigenvalue decomposition cannot be applied in GEP for the divergence of the empirical covariance matrix. Here we propose a general distributed GEP framework with one-shot communication for GEP. If the symmetric data covariance has repeated eigenvalues, e.g., in canonical component analysis, we further modify the method for better convergence. The theoretical analysis on approximation error is conducted and the relation to the divergence of the data covariance, the eigenvalues of the empirical data covariance, and the number of local servers is analyzed. Numerical experiments also show the effectiveness of the proposed algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.11625v2-abstract-full').style.display = 'none'; document.getElementById('2010.11625v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The derivation of the bound in the proof of Theorem 1 contains some errors. And it cannot be resolved at this time.</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.05300">arXiv:2010.05300</a> <span> [<a href="https://arxiv.org/pdf/2010.05300">pdf</a>, <a href="https://arxiv.org/format/2010.05300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Glance and Focus: a Dynamic Approach to Reducing Spatial Redundancy in Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yulin Wang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+R">Rui Huang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Shiji Song</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Le Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+G">Gao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.05300v1-abstract-short" style="display: inline;"> The accuracy of deep convolutional neural networks (CNNs) generally improves when fueled with high resolution images. However, this often comes at a high computational cost and high memory footprint. Inspired by the fact that not all regions in an image are task-relevant, we propose a novel framework that performs efficient image classification by processing a sequence of relatively small inputs,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05300v1-abstract-full').style.display = 'inline'; document.getElementById('2010.05300v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.05300v1-abstract-full" style="display: none;"> The accuracy of deep convolutional neural networks (CNNs) generally improves when fueled with high resolution images. However, this often comes at a high computational cost and high memory footprint. Inspired by the fact that not all regions in an image are task-relevant, we propose a novel framework that performs efficient image classification by processing a sequence of relatively small inputs, which are strategically selected from the original image with reinforcement learning. Such a dynamic decision process naturally facilitates adaptive inference at test time, i.e., it can be terminated once the model is sufficiently confident about its prediction and thus avoids further redundant computation. Notably, our framework is general and flexible as it is compatible with most of the state-of-the-art light-weighted CNNs (such as MobileNets, EfficientNets and RegNets), which can be conveniently deployed as the backbone feature extractor. Experiments on ImageNet show that our method consistently improves the computational efficiency of a wide variety of deep models. For example, it further reduces the average latency of the highly efficient MobileNet-V3 on an iPhone XS Max by 20% without sacrificing accuracy. Code and pre-trained models are available at https://github.com/blackfeather-wang/GFNet-Pytorch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05300v1-abstract-full').style.display = 'none'; document.getElementById('2010.05300v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.02664">arXiv:2005.02664</a> <span> [<a href="https://arxiv.org/pdf/2005.02664">pdf</a>, <a href="https://arxiv.org/ps/2005.02664">ps</a>, <a href="https://arxiv.org/format/2005.02664">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LSP.2021.3095017">10.1109/LSP.2021.3095017 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> One-shot Distibuted Algorithm for PCA with RBF Kernels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+F">Fan He</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kexin Lv</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaolin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.02664v3-abstract-short" style="display: inline;"> This letter proposes a one-shot algorithm for feature-distributed kernel PCA. Our algorithm is inspired by the dual relationship between sample-distributed and feature-distributed scenario. This interesting relationship makes it possible to establish distributed kernel PCA for feature-distributed cases from ideas in distributed PCA in sample-distributed scenario. In theoretical part, we analyze th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.02664v3-abstract-full').style.display = 'inline'; document.getElementById('2005.02664v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.02664v3-abstract-full" style="display: none;"> This letter proposes a one-shot algorithm for feature-distributed kernel PCA. Our algorithm is inspired by the dual relationship between sample-distributed and feature-distributed scenario. This interesting relationship makes it possible to establish distributed kernel PCA for feature-distributed cases from ideas in distributed PCA in sample-distributed scenario. In theoretical part, we analyze the approximation error for both linear and RBF kernels. The result suggests that when eigenvalues decay fast, the proposed algorithm gives high quality results with low communication cost. This result is also verified by numerical experiments, showing the effectiveness of our algorithm in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.02664v3-abstract-full').style.display = 'none'; document.getElementById('2005.02664v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.10981">arXiv:2004.10981</a> <span> [<a href="https://arxiv.org/pdf/2004.10981">pdf</a>, <a href="https://arxiv.org/ps/2004.10981">ps</a>, <a href="https://arxiv.org/format/2004.10981">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sparse Generalized Canonical Correlation Analysis: Distributed Alternating Iteration based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+J">Jia Cai</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kexin Lv</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+J">Junyi Huo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaolin Huang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.10981v1-abstract-short" style="display: inline;"> Sparse canonical correlation analysis (CCA) is a useful statistical tool to detect latent information with sparse structures. However, sparse CCA works only for two datasets, i.e., there are only two views or two distinct objects. To overcome this limitation, in this paper, we propose a sparse generalized canonical correlation analysis (GCCA), which could detect the latent relations of multiview d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.10981v1-abstract-full').style.display = 'inline'; document.getElementById('2004.10981v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.10981v1-abstract-full" style="display: none;"> Sparse canonical correlation analysis (CCA) is a useful statistical tool to detect latent information with sparse structures. However, sparse CCA works only for two datasets, i.e., there are only two views or two distinct objects. To overcome this limitation, in this paper, we propose a sparse generalized canonical correlation analysis (GCCA), which could detect the latent relations of multiview data with sparse structures. Moreover, the introduced sparsity could be considered as Laplace prior on the canonical variates. Specifically, we convert the GCCA into a linear system of equations and impose $\ell_1$ minimization penalty for sparsity pursuit. This results in a nonconvex problem on Stiefel manifold, which is difficult to solve. Motivated by Boyd's consensus problem, an algorithm based on distributed alternating iteration approach is developed and theoretical consistency analysis is investigated elaborately under mild conditions. Experiments on several synthetic and real world datasets demonstrate the effectiveness of the proposed algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.10981v1-abstract-full').style.display = 'none'; document.getElementById('2004.10981v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.10375">arXiv:2004.10375</a> <span> [<a href="https://arxiv.org/pdf/2004.10375">pdf</a>, <a href="https://arxiv.org/format/2004.10375">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Graph-based Kinship Reasoning Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Wanhua Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kangchen Lv</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiwen Lu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+J">Jianjiang Feng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jie Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.10375v1-abstract-short" style="display: inline;"> In this paper, we propose a graph-based kinship reasoning (GKR) network for kinship verification, which aims to effectively perform relational reasoning on the extracted features of an image pair. Unlike most existing methods which mainly focus on how to learn discriminative features, our method considers how to compare and fuse the extracted feature pair to reason about the kin relations. The pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.10375v1-abstract-full').style.display = 'inline'; document.getElementById('2004.10375v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.10375v1-abstract-full" style="display: none;"> In this paper, we propose a graph-based kinship reasoning (GKR) network for kinship verification, which aims to effectively perform relational reasoning on the extracted features of an image pair. Unlike most existing methods which mainly focus on how to learn discriminative features, our method considers how to compare and fuse the extracted feature pair to reason about the kin relations. The proposed GKR constructs a star graph called kinship relational graph where each peripheral node represents the information comparison in one feature dimension and the central node is used as a bridge for information communication among peripheral nodes. Then the GKR performs relational reasoning on this graph with recursive message passing. Extensive experimental results on the KinFaceW-I and KinFaceW-II datasets show that the proposed GKR outperforms the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.10375v1-abstract-full').style.display = 'none'; document.getElementById('2004.10375v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICME 2020(IEEE International Conference on Multimedia & Expo 2020) as an Oral Presentation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.03633">arXiv:1703.03633</a> <span> [<a href="https://arxiv.org/pdf/1703.03633">pdf</a>, <a href="https://arxiv.org/format/1703.03633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Gradient Descent: Better Generalization and Longer Horizons </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lv%2C+K">Kaifeng Lv</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shunhua Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.03633v3-abstract-short" style="display: inline;"> Training deep neural networks is a highly nontrivial task, involving carefully selecting appropriate training algorithms, scheduling step sizes and tuning other hyperparameters. Trying different combinations can be quite labor-intensive and time consuming. Recently, researchers have tried to use deep learning algorithms to exploit the landscape of the loss function of the training problem of inter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.03633v3-abstract-full').style.display = 'inline'; document.getElementById('1703.03633v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.03633v3-abstract-full" style="display: none;"> Training deep neural networks is a highly nontrivial task, involving carefully selecting appropriate training algorithms, scheduling step sizes and tuning other hyperparameters. Trying different combinations can be quite labor-intensive and time consuming. Recently, researchers have tried to use deep learning algorithms to exploit the landscape of the loss function of the training problem of interest, and learn how to optimize over it in an automatic way. In this paper, we propose a new learning-to-learn model and some useful and practical tricks. Our optimizer outperforms generic, hand-crafted optimization algorithms and state-of-the-art learning-to-learn optimizers by DeepMind in many tasks. We demonstrate the effectiveness of our algorithms on a number of tasks, including deep MLPs, CNNs, and simple LSTMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.03633v3-abstract-full').style.display = 'none'; document.getElementById('1703.03633v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICML 2017, 9 pages, 9 figures, 4 tables</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository