Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 77 results for author: <span class="mathjax">Ling, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ling%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ling, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ling%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ling, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ling%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ling%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ling%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14003">arXiv:2501.14003</a> <span> [<a href="https://arxiv.org/pdf/2501.14003">pdf</a>, <a href="https://arxiv.org/format/2501.14003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PaMMA-Net: Plasmas magnetic measurement evolution based on data-driven incremental accumulative prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yunfei Ling</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zijie Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yao Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuehang Wang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+B">Bingjia Xiao</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+X">Xin Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14003v1-abstract-short" style="display: inline;"> An accurate evolution model is crucial for effective control and in-depth study of fusion plasmas. Evolution methods based on physical models often encounter challenges such as insufficient robustness or excessive computational costs. Given the proven strong fitting capabilities of deep learning methods across various fields, including plasma research, this paper introduces a deep learning-based m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14003v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14003v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14003v1-abstract-full" style="display: none;"> An accurate evolution model is crucial for effective control and in-depth study of fusion plasmas. Evolution methods based on physical models often encounter challenges such as insufficient robustness or excessive computational costs. Given the proven strong fitting capabilities of deep learning methods across various fields, including plasma research, this paper introduces a deep learning-based magnetic measurement evolution method named PaMMA-Net (Plasma Magnetic Measurements Incremental Accumulative Prediction Network). This network is capable of evolving magnetic measurements in tokamak discharge experiments over extended periods or, in conjunction with equilibrium reconstruction algorithms, evolving macroscopic parameters such as plasma shape. Leveraging a incremental prediction approach and data augmentation techniques tailored for magnetic measurements, PaMMA-Net achieves superior evolution results compared to existing studies. The tests conducted on real experimental data from EAST validate the high generalization capability of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14003v1-abstract-full').style.display = 'none'; document.getElementById('2501.14003v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04744">arXiv:2501.04744</a> <span> [<a href="https://arxiv.org/pdf/2501.04744">pdf</a>, <a href="https://arxiv.org/format/2501.04744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Exact computation of the color function for triangular element interfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pan%2C+J">Jieyun Pan</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+D+K">D茅sir-Andr茅 Koffi Bi</a>, <a href="/search/cs?searchtype=author&query=Kottilingal%2C+A+B">Ahmed Basil Kottilingal</a>, <a href="/search/cs?searchtype=author&query=Costanzo%2C+S">Serena Costanzo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiacai Lu</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yue Ling</a>, <a href="/search/cs?searchtype=author&query=Scardovelli%2C+R">Ruben Scardovelli</a>, <a href="/search/cs?searchtype=author&query=Tryggvason%2C+G">Gr茅tar Tryggvason</a>, <a href="/search/cs?searchtype=author&query=Zaleski%2C+S">St茅phane Zaleski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04744v1-abstract-short" style="display: inline;"> The calculation of the volume enclosed by curved surfaces discretized into triangular elements, and a cube is of great importance in different domains, such as computer graphics and multiphase flow simulations. We propose a robust algorithm, the Front2VOF (F2V) algorithm, to address this problem. The F2V algorithm consists of two main steps. First, it identifies the polygons within the cube by seg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04744v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04744v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04744v1-abstract-full" style="display: none;"> The calculation of the volume enclosed by curved surfaces discretized into triangular elements, and a cube is of great importance in different domains, such as computer graphics and multiphase flow simulations. We propose a robust algorithm, the Front2VOF (F2V) algorithm, to address this problem. The F2V algorithm consists of two main steps. First, it identifies the polygons within the cube by segmenting the triangular elements on the surface, retaining only the portions inside the cube boundaries. Second, it computes the volume enclosed by these polygons in combination with the cube faces. To validate the algorithm's accuracy and robustness, we tested it using a range of synthetic configurations with known analytical solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04744v1-abstract-full').style.display = 'none'; document.getElementById('2501.04744v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00510">arXiv:2501.00510</a> <span> [<a href="https://arxiv.org/pdf/2501.00510">pdf</a>, <a href="https://arxiv.org/format/2501.00510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> VinT-6D: A Large-Scale Object-in-hand Dataset from Vision, Touch and Proprioception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+Z">Zhaoliang Wan</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yonggen Ling</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+S">Senlin Yi</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+L">Lu Qi</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+W">Wangwei Lee</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+M">Minglei Lu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sicheng Yang</a>, <a href="/search/cs?searchtype=author&query=Teng%2C+X">Xiao Teng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+P">Peng Lu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xu Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Ming-Hsuan Yang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+H">Hui Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00510v2-abstract-short" style="display: inline;"> This paper addresses the scarcity of large-scale datasets for accurate object-in-hand pose estimation, which is crucial for robotic in-hand manipulation within the ``Perception-Planning-Control" paradigm. Specifically, we introduce VinT-6D, the first extensive multi-modal dataset integrating vision, touch, and proprioception, to enhance robotic manipulation. VinT-6D comprises 2 million VinT-Sim an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00510v2-abstract-full').style.display = 'inline'; document.getElementById('2501.00510v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00510v2-abstract-full" style="display: none;"> This paper addresses the scarcity of large-scale datasets for accurate object-in-hand pose estimation, which is crucial for robotic in-hand manipulation within the ``Perception-Planning-Control" paradigm. Specifically, we introduce VinT-6D, the first extensive multi-modal dataset integrating vision, touch, and proprioception, to enhance robotic manipulation. VinT-6D comprises 2 million VinT-Sim and 0.1 million VinT-Real splits, collected via simulations in MuJoCo and Blender and a custom-designed real-world platform. This dataset is tailored for robotic hands, offering models with whole-hand tactile perception and high-quality, well-aligned data. To the best of our knowledge, the VinT-Real is the largest considering the collection difficulties in the real-world environment so that it can bridge the gap of simulation to real compared to the previous works. Built upon VinT-6D, we present a benchmark method that shows significant improvements in performance by fusing multi-modal information. The project is available at https://VinT-6D.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00510v2-abstract-full').style.display = 'none'; document.getElementById('2501.00510v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10137">arXiv:2412.10137</a> <span> [<a href="https://arxiv.org/pdf/2412.10137">pdf</a>, <a href="https://arxiv.org/format/2412.10137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Constraint-Aware Zero-Shot Vision-Language Navigation in Continuous Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kehan Chen</a>, <a href="/search/cs?searchtype=author&query=An%2C+D">Dong An</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Rongtao Xu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yifei Su</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yonggen Ling</a>, <a href="/search/cs?searchtype=author&query=Reid%2C+I">Ian Reid</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10137v1-abstract-short" style="display: inline;"> We address the task of Vision-Language Navigation in Continuous Environments (VLN-CE) under the zero-shot setting. Zero-shot VLN-CE is particularly challenging due to the absence of expert demonstrations for training and minimal environment structural prior to guide navigation. To confront these challenges, we propose a Constraint-Aware Navigator (CA-Nav), which reframes zero-shot VLN-CE as a sequ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10137v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10137v1-abstract-full" style="display: none;"> We address the task of Vision-Language Navigation in Continuous Environments (VLN-CE) under the zero-shot setting. Zero-shot VLN-CE is particularly challenging due to the absence of expert demonstrations for training and minimal environment structural prior to guide navigation. To confront these challenges, we propose a Constraint-Aware Navigator (CA-Nav), which reframes zero-shot VLN-CE as a sequential, constraint-aware sub-instruction completion process. CA-Nav continuously translates sub-instructions into navigation plans using two core modules: the Constraint-Aware Sub-instruction Manager (CSM) and the Constraint-Aware Value Mapper (CVM). CSM defines the completion criteria for decomposed sub-instructions as constraints and tracks navigation progress by switching sub-instructions in a constraint-aware manner. CVM, guided by CSM's constraints, generates a value map on the fly and refines it using superpixel clustering to improve navigation stability. CA-Nav achieves the state-of-the-art performance on two VLN-CE benchmarks, surpassing the previous best method by 12 percent and 13 percent in Success Rate on the validation unseen splits of R2R-CE and RxR-CE, respectively. Moreover, CA-Nav demonstrates its effectiveness in real-world robot deployments across various indoor scenes and instructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10137v1-abstract-full').style.display = 'none'; document.getElementById('2412.10137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17045">arXiv:2411.17045</a> <span> [<a href="https://arxiv.org/pdf/2411.17045">pdf</a>, <a href="https://arxiv.org/format/2411.17045">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Redefining Crowdsourced Test Report Prioritization: An Innovative Approach with Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuchen Ling</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shengcheng Yu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+G">Guobin Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17045v1-abstract-short" style="display: inline;"> Context: Crowdsourced testing has gained popularity in software testing, especially for mobile app testing, due to its ability to bring diversity and tackle fragmentation issues. However, the openness of crowdsourced testing presents challenges, particularly in the manual review of numerous test reports, which is time-consuming and labor-intensive. Objective: The primary goal of this research is t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17045v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17045v1-abstract-full" style="display: none;"> Context: Crowdsourced testing has gained popularity in software testing, especially for mobile app testing, due to its ability to bring diversity and tackle fragmentation issues. However, the openness of crowdsourced testing presents challenges, particularly in the manual review of numerous test reports, which is time-consuming and labor-intensive. Objective: The primary goal of this research is to improve the efficiency of review processes in crowdsourced testing. Traditional approaches to test report prioritization lack a deep understanding of semantic information in textual descriptions of these reports. This paper introduces LLMPrior, a novel approach for prioritizing crowdsourced test reports using large language models (LLMs). Method: LLMPrior leverages LLMs for the analysis and clustering of crowdsourced test reports based on the types of bugs revealed in their textual descriptions. This involves using prompt engineering techniques to enhance the performance of LLMs. Following the clustering, a recurrent selection algorithm is applied to prioritize the reports. Results: Empirical experiments are conducted to evaluate the effectiveness of LLMPrior. The findings indicate that LLMPrior not only surpasses current state-of-the-art approaches in terms of performance but also proves to be more feasible, efficient, and reliable. This success is attributed to the use of prompt engineering techniques and the cluster-based prioritization strategy. Conclusion: LLMPrior represents a significant advancement in crowdsourced test report prioritization. By effectively utilizing large language models and a cluster-based strategy, it addresses the challenges in traditional prioritization approaches, offering a more efficient and reliable solution for app developers dealing with crowdsourced test reports. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17045v1-abstract-full').style.display = 'none'; document.getElementById('2411.17045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Information and Software Technology in Nov 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22983">arXiv:2410.22983</a> <span> [<a href="https://arxiv.org/pdf/2410.22983">pdf</a>, <a href="https://arxiv.org/format/2410.22983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3664647.3680677">10.1145/3664647.3680677 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dual-Optimized Adaptive Graph Reconstruction for Multi-View Graph Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zichen Wen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tianyi Wu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yazhou Ren</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yawen Ling</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Chenhang Cui</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+X">Xiaorong Pu</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lifang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22983v1-abstract-short" style="display: inline;"> Multi-view clustering is an important machine learning task for multi-media data, encompassing various domains such as images, videos, and texts. Moreover, with the growing abundance of graph data, the significance of multi-view graph clustering (MVGC) has become evident. Most existing methods focus on graph neural networks (GNNs) to extract information from both graph structure and feature data t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22983v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22983v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22983v1-abstract-full" style="display: none;"> Multi-view clustering is an important machine learning task for multi-media data, encompassing various domains such as images, videos, and texts. Moreover, with the growing abundance of graph data, the significance of multi-view graph clustering (MVGC) has become evident. Most existing methods focus on graph neural networks (GNNs) to extract information from both graph structure and feature data to learn distinguishable node representations. However, traditional GNNs are designed with the assumption of homophilous graphs, making them unsuitable for widely prevalent heterophilous graphs. Several techniques have been introduced to enhance GNNs for heterophilous graphs. While these methods partially mitigate the heterophilous graph issue, they often neglect the advantages of traditional GNNs, such as their simplicity, interpretability, and efficiency. In this paper, we propose a novel multi-view graph clustering method based on dual-optimized adaptive graph reconstruction, named DOAGC. It mainly aims to reconstruct the graph structure adapted to traditional GNNs to deal with heterophilous graph issues while maintaining the advantages of traditional GNNs. Specifically, we first develop an adaptive graph reconstruction mechanism that accounts for node correlation and original structural information. To further optimize the reconstruction graph, we design a dual optimization strategy and demonstrate the feasibility of our optimization strategy through mutual information theory. Numerous experiments demonstrate that DOAGC effectively mitigates the heterophilous graph problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22983v1-abstract-full').style.display = 'none'; document.getElementById('2410.22983v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16613">arXiv:2410.16613</a> <span> [<a href="https://arxiv.org/pdf/2410.16613">pdf</a>, <a href="https://arxiv.org/format/2410.16613">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.compbiomed.2024.109225">10.1016/j.compbiomed.2024.109225 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Real-time Sub-milliwatt Epilepsy Detection Implemented on a Spiking Neural Network Edge Inference Processor </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lia%2C+R">Ruixin Lia</a>, <a href="/search/cs?searchtype=author&query=Zhaoa%2C+G">Guoxu Zhaoa</a>, <a href="/search/cs?searchtype=author&query=Muir%2C+D+R">Dylan Richard Muir</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuya Ling</a>, <a href="/search/cs?searchtype=author&query=Burelo%2C+K">Karla Burelo</a>, <a href="/search/cs?searchtype=author&query=Khoei%2C+M">Mina Khoei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dong Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yannan Xing</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+N">Ning Qiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16613v1-abstract-short" style="display: inline;"> Analyzing electroencephalogram (EEG) signals to detect the epileptic seizure status of a subject presents a challenge to existing technologies aimed at providing timely and efficient diagnosis. In this study, we aimed to detect interictal and ictal periods of epileptic seizures using a spiking neural network (SNN). Our proposed approach provides an online and real-time preliminary diagnosis of epi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16613v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16613v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16613v1-abstract-full" style="display: none;"> Analyzing electroencephalogram (EEG) signals to detect the epileptic seizure status of a subject presents a challenge to existing technologies aimed at providing timely and efficient diagnosis. In this study, we aimed to detect interictal and ictal periods of epileptic seizures using a spiking neural network (SNN). Our proposed approach provides an online and real-time preliminary diagnosis of epileptic seizures and helps to detect possible pathological conditions.To validate our approach, we conducted experiments using multiple datasets. We utilized a trained SNN to identify the presence of epileptic seizures and compared our results with those of related studies. The SNN model was deployed on Xylo, a digital SNN neuromorphic processor designed to process temporal signals. Xylo efficiently simulates spiking leaky integrate-and-fire neurons with exponential input synapses. Xylo has much lower energy requirments than traditional approaches to signal processing, making it an ideal platform for developing low-power seizure detection systems.Our proposed method has a high test accuracy of 93.3% and 92.9% when classifying ictal and interictal periods. At the same time, the application has an average power consumption of 87.4 uW(IO power) + 287.9 uW(computational power) when deployed to Xylo. Our method demonstrates excellent low-latency performance when tested on multiple datasets. Our work provides a new solution for seizure detection, and it is expected to be widely used in portable and wearable devices in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16613v1-abstract-full').style.display = 'none'; document.getElementById('2410.16613v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Computers in Biology and Medicine(2024), 183, 109225 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03596">arXiv:2410.03596</a> <span> [<a href="https://arxiv.org/pdf/2410.03596">pdf</a>, <a href="https://arxiv.org/format/2410.03596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SiMilarity-Enhanced Homophily for Multi-View Heterophilous Graph Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianpeng Chen</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yawen Ling</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yazhou Ren</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zichen Wen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tianyi Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shufei Zhang</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lifang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03596v1-abstract-short" style="display: inline;"> With the increasing prevalence of graph-structured data, multi-view graph clustering has been widely used in various downstream applications. Existing approaches primarily rely on a unified message passing mechanism, which significantly enhances clustering performance. Nevertheless, this mechanism limits its applicability to heterophilous situations, as it is fundamentally predicated on the assump… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03596v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03596v1-abstract-full" style="display: none;"> With the increasing prevalence of graph-structured data, multi-view graph clustering has been widely used in various downstream applications. Existing approaches primarily rely on a unified message passing mechanism, which significantly enhances clustering performance. Nevertheless, this mechanism limits its applicability to heterophilous situations, as it is fundamentally predicated on the assumption of homophily, i.e., the connected nodes often belong to the same class. In reality, this assumption does not always hold; a moderately or even mildly homophilous graph is more common than a fully homophilous one due to inevitable heterophilous information in the graph. To address this issue, in this paper, we propose a novel SiMilarity-enhanced Homophily for Multi-view Heterophilous Graph Clustering (SMHGC) approach. By analyzing the relationship between similarity and graph homophily, we propose to enhance the homophily by introducing three similarity terms, i.e., neighbor pattern similarity, node feature similarity, and multi-view global similarity, in a label-free manner. Then, a consensus-based inter- and intra-view fusion paradigm is proposed to fuse the improved homophilous graph from different views and utilize them for clustering. The state-of-the-art experimental results on both multi-view heterophilous and homophilous datasets collectively demonstrate the strong capacity of similarity for unsupervised multi-view heterophilous graph learning. Additionally, the consistent performance across semi-synthetic datasets with varying levels of homophily serves as further evidence of SMHGC's resilience to heterophily. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03596v1-abstract-full').style.display = 'none'; document.getElementById('2410.03596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01223">arXiv:2409.01223</a> <span> [<a href="https://arxiv.org/pdf/2409.01223">pdf</a>, <a href="https://arxiv.org/format/2409.01223">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Exact Error Exponents of Concatenated Codes for DNA Storage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01223v1-abstract-short" style="display: inline;"> In this paper, we consider a concatenated coding based class of DNA storage codes in which the selected molecules are constrained to be taken from an ``inner'' codebook associated with the sequencing channel. This codebook is used in a ``black-box'' manner, and is only assumed to operate at an achievable rate in the sense of attaining asymptotically vanishing maximal (inner) error probability. We… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01223v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01223v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01223v1-abstract-full" style="display: none;"> In this paper, we consider a concatenated coding based class of DNA storage codes in which the selected molecules are constrained to be taken from an ``inner'' codebook associated with the sequencing channel. This codebook is used in a ``black-box'' manner, and is only assumed to operate at an achievable rate in the sense of attaining asymptotically vanishing maximal (inner) error probability. We first derive the exact error exponent in a widely-studied regime of constant rate and a linear number of sequencing reads, and show strict improvements over an existing achievable error exponent. Moreover, our achievability analysis is based on a coded-index strategy, implying that such strategies attain the highest error exponents within the broader class of codes that we consider. We then extend our results to other scaling regimes, including a super-linear number of reads, as well as several certain low-rate regimes. We find that the latter comes with notable intricacies, such as the suboptimality of codewords with all distinct molecules, and certain dependencies of the error exponents on the model for sequencing errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01223v1-abstract-full').style.display = 'none'; document.getElementById('2409.01223v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06984">arXiv:2407.06984</a> <span> [<a href="https://arxiv.org/pdf/2407.06984">pdf</a>, <a href="https://arxiv.org/format/2407.06984">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Category-level Object Detection, Pose Estimation and Reconstruction from Stereo Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chuanrui Zhang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yonggen Ling</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+M">Minglei Lu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+M">Minghan Qin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoqian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06984v2-abstract-short" style="display: inline;"> We study the 3D object understanding task for manipulating everyday objects with different material properties (diffuse, specular, transparent and mixed). Existing monocular and RGB-D methods suffer from scale ambiguity due to missing or imprecise depth measurements. We present CODERS, a one-stage approach for Category-level Object Detection, pose Estimation and Reconstruction from Stereo images.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06984v2-abstract-full').style.display = 'inline'; document.getElementById('2407.06984v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06984v2-abstract-full" style="display: none;"> We study the 3D object understanding task for manipulating everyday objects with different material properties (diffuse, specular, transparent and mixed). Existing monocular and RGB-D methods suffer from scale ambiguity due to missing or imprecise depth measurements. We present CODERS, a one-stage approach for Category-level Object Detection, pose Estimation and Reconstruction from Stereo images. The base of our pipeline is an implicit stereo matching module that combines stereo image features with 3D position information. Concatenating this presented module and the following transform-decoder architecture leads to end-to-end learning of multiple tasks required by robot manipulation. Our approach significantly outperforms all competing methods in the public TOD dataset. Furthermore, trained on simulated data, CODERS generalize well to unseen category-level object instances in real-world robot manipulation experiments. Our dataset, code, and demos will be available on our project page. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06984v2-abstract-full').style.display = 'none'; document.getElementById('2407.06984v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06196">arXiv:2407.06196</a> <span> [<a href="https://arxiv.org/pdf/2407.06196">pdf</a>, <a href="https://arxiv.org/format/2407.06196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Poetry2Image: An Iterative Correction Framework for Images Generated from Chinese Classical Poetry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jing Jiang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yiran Ling</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Binzhu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pengxiang Li</a>, <a href="/search/cs?searchtype=author&query=Piao%2C+J">Junming Piao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06196v1-abstract-short" style="display: inline;"> Text-to-image generation models often struggle with key element loss or semantic confusion in tasks involving Chinese classical poetry.Addressing this issue through fine-tuning models needs considerable training costs. Additionally, manual prompts for re-diffusion adjustments need professional knowledge. To solve this problem, we propose Poetry2Image, an iterative correction framework for images g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06196v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06196v1-abstract-full" style="display: none;"> Text-to-image generation models often struggle with key element loss or semantic confusion in tasks involving Chinese classical poetry.Addressing this issue through fine-tuning models needs considerable training costs. Additionally, manual prompts for re-diffusion adjustments need professional knowledge. To solve this problem, we propose Poetry2Image, an iterative correction framework for images generated from Chinese classical poetry. Utilizing an external poetry dataset, Poetry2Image establishes an automated feedback and correction loop, which enhances the alignment between poetry and image through image generation models and subsequent re-diffusion modifications suggested by large language models (LLM). Using a test set of 200 sentences of Chinese classical poetry, the proposed method--when integrated with five popular image generation models--achieves an average element completeness of 70.63%, representing an improvement of 25.56% over direct image generation. In tests of semantic correctness, our method attains an average semantic consistency of 80.09%. The study not only promotes the dissemination of ancient poetry culture but also offers a reference for similar non-fine-tuning methods to enhance LLM generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06196v1-abstract-full').style.display = 'none'; document.getElementById('2407.06196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05415">arXiv:2407.05415</a> <span> [<a href="https://arxiv.org/pdf/2407.05415">pdf</a>, <a href="https://arxiv.org/format/2407.05415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DIVESPOT: Depth Integrated Volume Estimation of Pile of Things Based on Point Cloud </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yiran Ling</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Rongqiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yixuan Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongbo Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jing Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05415v1-abstract-short" style="display: inline;"> Non-contact volume estimation of pile-type objects has considerable potential in industrial scenarios, including grain, coal, mining, and stone materials. However, using existing method for these scenarios is challenged by unstable measurement poses, significant light interference, the difficulty of training data collection, and the computational burden brought by large piles. To address the above… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05415v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05415v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05415v1-abstract-full" style="display: none;"> Non-contact volume estimation of pile-type objects has considerable potential in industrial scenarios, including grain, coal, mining, and stone materials. However, using existing method for these scenarios is challenged by unstable measurement poses, significant light interference, the difficulty of training data collection, and the computational burden brought by large piles. To address the above issues, we propose the Depth Integrated Volume EStimation of Pile Of Things (DIVESPOT) based on point cloud technology in this study. For the challenges of unstable measurement poses, the point cloud pose correction and filtering algorithm is designed based on the Random Sample Consensus (RANSAC) and the Hierarchical Density-Based Spatial Clustering of Applications with Noise (HDBSCAN). To cope with light interference and to avoid the relying on training data, the height-distribution-based ground feature extraction algorithm is proposed to achieve RGB-independent. To reduce the computational burden, the storage space optimizing strategy is developed, such that accurate estimation can be acquired by using compressed voxels. Experimental results demonstrate that the DIVESPOT method enables non-data-driven, RGB-independent segmentation of pile point clouds, maintaining a volume calculation relative error within 2%. Even with 90% compression of the voxel mesh, the average error of the results can be under 3%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05415v1-abstract-full').style.display = 'none'; document.getElementById('2407.05415v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10521">arXiv:2406.10521</a> <span> [<a href="https://arxiv.org/pdf/2406.10521">pdf</a>, <a href="https://arxiv.org/format/2406.10521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MALLM-GAN: Multi-Agent Large Language Model as Generative Adversarial Network for Synthesizing Tabular Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yaobin Ling</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xiaoqian Jiang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yejin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10521v3-abstract-short" style="display: inline;"> In the era of big data, access to abundant data is crucial for driving research forward. However, such data is often inaccessible due to privacy concerns or high costs, particularly in healthcare domain. Generating synthetic (tabular) data can address this, but existing models typically require substantial amounts of data to train effectively, contradicting our objective to solve data scarcity. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10521v3-abstract-full').style.display = 'inline'; document.getElementById('2406.10521v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10521v3-abstract-full" style="display: none;"> In the era of big data, access to abundant data is crucial for driving research forward. However, such data is often inaccessible due to privacy concerns or high costs, particularly in healthcare domain. Generating synthetic (tabular) data can address this, but existing models typically require substantial amounts of data to train effectively, contradicting our objective to solve data scarcity. To address this challenge, we propose a novel framework to generate synthetic tabular data, powered by large language models (LLMs) that emulates the architecture of a Generative Adversarial Network (GAN). By incorporating data generation process as contextual information and utilizing LLM as the optimizer, our approach significantly enhance the quality of synthetic data generation in common scenarios with small sample sizes. Our experimental results on public and private datasets demonstrate that our model outperforms several state-of-art models regarding generating higher quality synthetic data for downstream tasks while keeping privacy of the real data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10521v3-abstract-full').style.display = 'none'; document.getElementById('2406.10521v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14934">arXiv:2404.14934</a> <span> [<a href="https://arxiv.org/pdf/2404.14934">pdf</a>, <a href="https://arxiv.org/format/2404.14934">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> G3R: Generating Rich and Fine-grained mmWave Radar Data from 2D Videos for Generalized Gesture Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+K">Kaikai Deng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dong Zhao</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W">Wenxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yue Ling</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+K">Kangwen Yin</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+H">Huadong Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14934v1-abstract-short" style="display: inline;"> Millimeter wave radar is gaining traction recently as a promising modality for enabling pervasive and privacy-preserving gesture recognition. However, the lack of rich and fine-grained radar datasets hinders progress in developing generalized deep learning models for gesture recognition across various user postures (e.g., standing, sitting), positions, and scenes. To remedy this, we resort to desi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14934v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14934v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14934v1-abstract-full" style="display: none;"> Millimeter wave radar is gaining traction recently as a promising modality for enabling pervasive and privacy-preserving gesture recognition. However, the lack of rich and fine-grained radar datasets hinders progress in developing generalized deep learning models for gesture recognition across various user postures (e.g., standing, sitting), positions, and scenes. To remedy this, we resort to designing a software pipeline that exploits wealthy 2D videos to generate realistic radar data, but it needs to address the challenge of simulating diversified and fine-grained reflection properties of user gestures. To this end, we design G3R with three key components: (i) a gesture reflection point generator expands the arm's skeleton points to form human reflection points; (ii) a signal simulation model simulates the multipath reflection and attenuation of radar signals to output the human intensity map; (iii) an encoder-decoder model combines a sampling module and a fitting module to address the differences in number and distribution of points between generated and real-world radar data for generating realistic radar data. We implement and evaluate G3R using 2D videos from public data sources and self-collected real-world radar data, demonstrating its superiority over other state-of-the-art approaches for gesture recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14934v1-abstract-full').style.display = 'none'; document.getElementById('2404.14934v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 29 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10777">arXiv:2404.10777</a> <span> [<a href="https://arxiv.org/pdf/2404.10777">pdf</a>, <a href="https://arxiv.org/format/2404.10777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Divide-Conquer-and-Merge: Memory- and Time-Efficient Holographic Displays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhenxing Dong</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+J">Jidong Jia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuye Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10777v1-abstract-short" style="display: inline;"> Recently, deep learning-based computer-generated holography (CGH) has demonstrated tremendous potential in three-dimensional (3D) displays and yielded impressive display quality. However, most existing deep learning-based CGH techniques can only generate holograms of 1080p resolution, which is far from the ultra-high resolution (16K+) required for practical virtual reality (VR) and augmented reali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10777v1-abstract-full').style.display = 'inline'; document.getElementById('2404.10777v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10777v1-abstract-full" style="display: none;"> Recently, deep learning-based computer-generated holography (CGH) has demonstrated tremendous potential in three-dimensional (3D) displays and yielded impressive display quality. However, most existing deep learning-based CGH techniques can only generate holograms of 1080p resolution, which is far from the ultra-high resolution (16K+) required for practical virtual reality (VR) and augmented reality (AR) applications to support a wide field of view and large eye box. One of the major obstacles in current CGH frameworks lies in the limited memory available on consumer-grade GPUs which could not facilitate the generation of higher-definition holograms. To overcome the aforementioned challenge, we proposed a divide-conquer-and-merge strategy to address the memory and computational capacity scarcity in ultra-high-definition CGH generation. This algorithm empowers existing CGH frameworks to synthesize higher-definition holograms at a faster speed while maintaining high-fidelity image display quality. Both simulations and experiments were conducted to demonstrate the capabilities of the proposed framework. By integrating our strategy into HoloNet and CCNNs, we achieved significant reductions in GPU memory usage during the training period by 64.3\% and 12.9\%, respectively. Furthermore, we observed substantial speed improvements in hologram generation, with an acceleration of up to 3$\times$ and 2 $\times$, respectively. Particularly, we successfully trained and inferred 8K definition holograms on an NVIDIA GeForce RTX 3090 GPU for the first time in simulations. Furthermore, we conducted full-color optical experiments to verify the effectiveness of our method. We believe our strategy can provide a novel approach for memory- and time-efficient holographic displays. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10777v1-abstract-full').style.display = 'none'; document.getElementById('2404.10777v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted as conference paper in IEEE VR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01162">arXiv:2403.01162</a> <span> [<a href="https://arxiv.org/pdf/2403.01162">pdf</a>, <a href="https://arxiv.org/ps/2403.01162">ps</a>, <a href="https://arxiv.org/format/2403.01162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.orl.2024.107103">10.1016/j.orl.2024.107103 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Envy-Free House Allocation with Minimum Subsidy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choo%2C+D">Davin Choo</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Suksompong%2C+W">Warut Suksompong</a>, <a href="/search/cs?searchtype=author&query=Teh%2C+N">Nicholas Teh</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01162v1-abstract-short" style="display: inline;"> House allocation refers to the problem where $m$ houses are to be allocated to $n$ agents so that each agent receives one house. Since an envy-free house allocation does not always exist, we consider finding such an allocation in the presence of subsidy. We show that computing an envy-free allocation with minimum subsidy is NP-hard in general, but can be done efficiently if $m$ differs from $n$ by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01162v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01162v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01162v1-abstract-full" style="display: none;"> House allocation refers to the problem where $m$ houses are to be allocated to $n$ agents so that each agent receives one house. Since an envy-free house allocation does not always exist, we consider finding such an allocation in the presence of subsidy. We show that computing an envy-free allocation with minimum subsidy is NP-hard in general, but can be done efficiently if $m$ differs from $n$ by an additive constant or if the agents have identical utilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01162v1-abstract-full').style.display = 'none'; document.getElementById('2403.01162v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Operations Research Letters, 54:107103 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.02682">arXiv:2401.02682</a> <span> [<a href="https://arxiv.org/pdf/2401.02682">pdf</a>, <a href="https://arxiv.org/format/2401.02682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Homophily-Related: Adaptive Hybrid Graph Filter for Multi-View Graph Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zichen Wen</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yawen Ling</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yazhou Ren</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tianyi Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianpeng Chen</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+X">Xiaorong Pu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Z">Zhifeng Hao</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lifang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.02682v1-abstract-short" style="display: inline;"> Recently there is a growing focus on graph data, and multi-view graph clustering has become a popular area of research interest. Most of the existing methods are only applicable to homophilous graphs, yet the extensive real-world graph data can hardly fulfill the homophily assumption, where the connected nodes tend to belong to the same class. Several studies have pointed out that the poor perform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02682v1-abstract-full').style.display = 'inline'; document.getElementById('2401.02682v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.02682v1-abstract-full" style="display: none;"> Recently there is a growing focus on graph data, and multi-view graph clustering has become a popular area of research interest. Most of the existing methods are only applicable to homophilous graphs, yet the extensive real-world graph data can hardly fulfill the homophily assumption, where the connected nodes tend to belong to the same class. Several studies have pointed out that the poor performance on heterophilous graphs is actually due to the fact that conventional graph neural networks (GNNs), which are essentially low-pass filters, discard information other than the low-frequency information on the graph. Nevertheless, on certain graphs, particularly heterophilous ones, neglecting high-frequency information and focusing solely on low-frequency information impedes the learning of node representations. To break this limitation, our motivation is to perform graph filtering that is closely related to the homophily degree of the given graph, with the aim of fully leveraging both low-frequency and high-frequency signals to learn distinguishable node embedding. In this work, we propose Adaptive Hybrid Graph Filter for Multi-View Graph Clustering (AHGFC). Specifically, a graph joint process and graph joint aggregation matrix are first designed by using the intrinsic node features and adjacency relationship, which makes the low and high-frequency signals on the graph more distinguishable. Then we design an adaptive hybrid graph filter that is related to the homophily degree, which learns the node embedding based on the graph joint aggregation matrix. After that, the node embedding of each view is weighted and fused into a consensus embedding for the downstream task. Experimental results show that our proposed model performs well on six datasets containing homophilous and heterophilous graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02682v1-abstract-full').style.display = 'none'; document.getElementById('2401.02682v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10655">arXiv:2312.10655</a> <span> [<a href="https://arxiv.org/pdf/2312.10655">pdf</a>, <a href="https://arxiv.org/format/2312.10655">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Practical Non-Intrusive GUI Exploration Testing with Visual-based Robotic Arms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shengcheng Yu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+M">Mingzhe Du</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuchen Ling</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhendong Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10655v1-abstract-short" style="display: inline;"> GUI testing is significant in the SE community. Most existing frameworks are intrusive and only support some specific platforms. With the development of distinct scenarios, diverse embedded systems or customized operating systems on different devices do not support existing intrusive GUI testing frameworks. Some approaches adopt robotic arms to replace the interface invoking of mobile apps under t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10655v1-abstract-full').style.display = 'inline'; document.getElementById('2312.10655v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10655v1-abstract-full" style="display: none;"> GUI testing is significant in the SE community. Most existing frameworks are intrusive and only support some specific platforms. With the development of distinct scenarios, diverse embedded systems or customized operating systems on different devices do not support existing intrusive GUI testing frameworks. Some approaches adopt robotic arms to replace the interface invoking of mobile apps under test and use computer vision technologies to identify GUI elements. However, some challenges are unsolved. First, existing approaches assume that GUI screens are fixed so that they cannot be adapted to diverse systems with different screen conditions. Second, existing approaches use XY-plane robotic arms, which cannot flexibly simulate testing operations. Third, existing approaches ignore compatibility bugs and only focus on crash bugs. A more practical approach is required for the non-intrusive scenario. We propose a practical non-intrusive GUI testing framework with visual robotic arms. RoboTest integrates novel GUI screen and widget detection algorithms, adaptive to detecting screens of different sizes and then to extracting GUI widgets from the detected screens. Then, a set of testing operations is applied with a 4-DOF robotic arm, which effectively and flexibly simulates human testing operations. During app exploration, RoboTest integrates the Principle of Proximity-guided exploration strategy, choosing close widgets of the previous targets to reduce robotic arm movement overhead and improve exploration efficiency. RoboTest can effectively detect some compatibility bugs beyond crash bugs with a GUI comparison on different devices of the same test operations. We evaluate RoboTest with 20 mobile apps, with a case study on an embedded system. The results show that RoboTest can effectively, efficiently, and generally explore AUTs to find bugs and reduce exploration time overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10655v1-abstract-full').style.display = 'none'; document.getElementById('2312.10655v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 46th International Conference on Software Engineering (ICSE 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.14251">arXiv:2311.14251</a> <span> [<a href="https://arxiv.org/pdf/2311.14251">pdf</a>, <a href="https://arxiv.org/ps/2311.14251">ps</a>, <a href="https://arxiv.org/format/2311.14251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Optimal 1-bit Error Exponent for 2-hop Relaying with Binary-Input Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.14251v2-abstract-short" style="display: inline;"> In this paper, we study the problem of relaying a single bit over a tandem of binary-input channels, with the goal of attaining the highest possible error exponent in the exponentially decaying error probability. Our previous work gave an exact characterization of the best possible error exponent in various special cases, including when the two channels are identical, but the general case was left… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14251v2-abstract-full').style.display = 'inline'; document.getElementById('2311.14251v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.14251v2-abstract-full" style="display: none;"> In this paper, we study the problem of relaying a single bit over a tandem of binary-input channels, with the goal of attaining the highest possible error exponent in the exponentially decaying error probability. Our previous work gave an exact characterization of the best possible error exponent in various special cases, including when the two channels are identical, but the general case was left as an open problem. We resolve this open problem by deriving a new converse bound that matches our existing achievability bound. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14251v2-abstract-full').style.display = 'none'; document.getElementById('2311.14251v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Information Theory</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.20544">arXiv:2310.20544</a> <span> [<a href="https://arxiv.org/pdf/2310.20544">pdf</a>, <a href="https://arxiv.org/format/2310.20544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chaotic Dynamics">nlin.CD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Information-theoretic causality and applications to turbulence: energy cascade and inner/outer layer interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lozano-Dur%C3%A1n%2C+A">Adri谩n Lozano-Dur谩n</a>, <a href="/search/cs?searchtype=author&query=Arranz%2C+G">Gonzalo Arranz</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuenong Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.20544v1-abstract-short" style="display: inline;"> We introduce an information-theoretic method for quantifying causality in chaotic systems. The approach, referred to as IT-causality, quantifies causality by measuring the information gained about future events conditioned on the knowledge of past events. The causal interactions are classified into redundant, unique, and synergistic contributions depending on their nature. The formulation is non-i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20544v1-abstract-full').style.display = 'inline'; document.getElementById('2310.20544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.20544v1-abstract-full" style="display: none;"> We introduce an information-theoretic method for quantifying causality in chaotic systems. The approach, referred to as IT-causality, quantifies causality by measuring the information gained about future events conditioned on the knowledge of past events. The causal interactions are classified into redundant, unique, and synergistic contributions depending on their nature. The formulation is non-intrusive, invariance under invertible transformations of the variables, and provides the missing causality due to unobserved variables. The method only requires pairs of past-future events of the quantities of interest, making it convenient for both computational simulations and experimental investigations. IT-causality is validated in four scenarios representing basic causal interactions among variables: mediator, confounder, redundant collider, and synergistic collider. The approach is leveraged to address two questions relevant to turbulence research: i) the scale locality of the energy cascade in isotropic turbulence, and ii) the interactions between inner and outer layer flow motions in wall-bounded turbulence. In the former case, we demonstrate that causality in the energy cascade flows sequentially from larger to smaller scales without requiring intermediate scales. Conversely, the flow of information from small to large scales is shown to be redundant. In the second problem, we observe a unidirectional causality flow, with causality predominantly originating from the outer layer and propagating towards the inner layer, but not vice versa. The decomposition of IT-causality into intensities also reveals that the causality is primarily associated with high-velocity streaks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20544v1-abstract-full').style.display = 'none'; document.getElementById('2310.20544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15584">arXiv:2310.15584</a> <span> [<a href="https://arxiv.org/pdf/2310.15584">pdf</a>, <a href="https://arxiv.org/format/2310.15584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Split Federated Learning over Wireless Communication Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+C">Ce Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jinxuan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yushi Ling</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+M">Miaowen Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15584v1-abstract-short" style="display: inline;"> The development of artificial intelligence (AI) provides opportunities for the promotion of deep neural network (DNN)-based applications. However, the large amount of parameters and computational complexity of DNN makes it difficult to deploy it on edge devices which are resource-constrained. An efficient method to address this challenge is model partition/splitting, in which DNN is divided into t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15584v1-abstract-full').style.display = 'inline'; document.getElementById('2310.15584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15584v1-abstract-full" style="display: none;"> The development of artificial intelligence (AI) provides opportunities for the promotion of deep neural network (DNN)-based applications. However, the large amount of parameters and computational complexity of DNN makes it difficult to deploy it on edge devices which are resource-constrained. An efficient method to address this challenge is model partition/splitting, in which DNN is divided into two parts which are deployed on device and server respectively for co-training or co-inference. In this paper, we consider a split federated learning (SFL) framework that combines the parallel model training mechanism of federated learning (FL) and the model splitting structure of split learning (SL). We consider a practical scenario of heterogeneous devices with individual split points of DNN. We formulate a joint problem of split point selection and bandwidth allocation to minimize the system latency. By using alternating optimization, we decompose the problem into two sub-problems and solve them optimally. Experiment results demonstrate the superiority of our work in latency reduction and accuracy improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15584v1-abstract-full').style.display = 'none'; document.getElementById('2310.15584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.01361">arXiv:2310.01361</a> <span> [<a href="https://arxiv.org/pdf/2310.01361">pdf</a>, <a href="https://arxiv.org/format/2310.01361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> GenSim: Generating Robotic Simulation Tasks via Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lirui Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yiyang Ling</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zhecheng Yuan</a>, <a href="/search/cs?searchtype=author&query=Shridhar%2C+M">Mohit Shridhar</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+C">Chen Bao</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yuzhe Qin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bailin Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Huazhe Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.01361v2-abstract-short" style="display: inline;"> Collecting large amounts of real-world interaction data to train general robotic policies is often prohibitively expensive, thus motivating the use of simulation data. However, existing methods for data generation have generally focused on scene-level diversity (e.g., object instances and poses) rather than task-level diversity, due to the human effort required to come up with and verify novel tas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01361v2-abstract-full').style.display = 'inline'; document.getElementById('2310.01361v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.01361v2-abstract-full" style="display: none;"> Collecting large amounts of real-world interaction data to train general robotic policies is often prohibitively expensive, thus motivating the use of simulation data. However, existing methods for data generation have generally focused on scene-level diversity (e.g., object instances and poses) rather than task-level diversity, due to the human effort required to come up with and verify novel tasks. This has made it challenging for policies trained on simulation data to demonstrate significant task-level generalization. In this paper, we propose to automatically generate rich simulation environments and expert demonstrations by exploiting a large language models' (LLM) grounding and coding ability. Our approach, dubbed GenSim, has two modes: goal-directed generation, wherein a target task is given to the LLM and the LLM proposes a task curriculum to solve the target task, and exploratory generation, wherein the LLM bootstraps from previous tasks and iteratively proposes novel tasks that would be helpful in solving more complex tasks. We use GPT4 to expand the existing benchmark by ten times to over 100 tasks, on which we conduct supervised finetuning and evaluate several LLMs including finetuned GPTs and Code Llama on code generation for robotic simulation tasks. Furthermore, we observe that LLMs-generated simulation programs can enhance task-level generalization significantly when used for multitask policy training. We further find that with minimal sim-to-real adaptation, the multitask policies pretrained on GPT4-generated simulation tasks exhibit stronger transfer to unseen long-horizon tasks in the real world and outperform baselines by 25%. See the project website (https://liruiw.github.io/gensim) for code, demos, and videos. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01361v2-abstract-full').style.display = 'none'; document.getElementById('2310.01361v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">See our project website (https://liruiw.github.io/gensim), demo and datasets (https://huggingface.co/spaces/Gen-Sim/Gen-Sim), and code (https://github.com/liruiw/GenSim) for more details</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Conference on Learning Representations (ICLR), 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.13574">arXiv:2309.13574</a> <span> [<a href="https://arxiv.org/pdf/2309.13574">pdf</a>, <a href="https://arxiv.org/ps/2309.13574">ps</a>, <a href="https://arxiv.org/format/2309.13574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> LLM for Test Script Generation and Migration: Challenges, Capabilities, and Opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shengcheng Yu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuchen Ling</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chentian Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.13574v1-abstract-short" style="display: inline;"> This paper investigates the application of large language models (LLM) in the domain of mobile application test script generation. Test script generation is a vital component of software testing, enabling efficient and reliable automation of repetitive test tasks. However, existing generation approaches often encounter limitations, such as difficulties in accurately capturing and reproducing test… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13574v1-abstract-full').style.display = 'inline'; document.getElementById('2309.13574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.13574v1-abstract-full" style="display: none;"> This paper investigates the application of large language models (LLM) in the domain of mobile application test script generation. Test script generation is a vital component of software testing, enabling efficient and reliable automation of repetitive test tasks. However, existing generation approaches often encounter limitations, such as difficulties in accurately capturing and reproducing test scripts across diverse devices, platforms, and applications. These challenges arise due to differences in screen sizes, input modalities, platform behaviors, API inconsistencies, and application architectures. Overcoming these limitations is crucial for achieving robust and comprehensive test automation. By leveraging the capabilities of LLMs, we aim to address these challenges and explore its potential as a versatile tool for test automation. We investigate how well LLMs can adapt to diverse devices and systems while accurately capturing and generating test scripts. Additionally, we evaluate its cross-platform generation capabilities by assessing its ability to handle operating system variations and platform-specific behaviors. Furthermore, we explore the application of LLMs in cross-app migration, where it generates test scripts across different applications and software environments based on existing scripts. Throughout the investigation, we analyze its adaptability to various user interfaces, app architectures, and interaction patterns, ensuring accurate script generation and compatibility. The findings of this research contribute to the understanding of LLMs' capabilities in test automation. Ultimately, this research aims to enhance software testing practices, empowering app developers to achieve higher levels of software quality and development efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13574v1-abstract-full').style.display = 'none'; document.getElementById('2309.13574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 23rd IEEE International Conference on Software Quality, Reliability, and Security (QRS 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.03782">arXiv:2308.03782</a> <span> [<a href="https://arxiv.org/pdf/2308.03782">pdf</a>, <a href="https://arxiv.org/ps/2308.03782">ps</a>, <a href="https://arxiv.org/format/2308.03782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bio+Clinical BERT, BERT Base, and CNN Performance Comparison for Predicting Drug-Review Satisfaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yue Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.03782v1-abstract-short" style="display: inline;"> The objective of this study is to develop natural language processing (NLP) models that can analyze patients' drug reviews and accurately classify their satisfaction levels as positive, neutral, or negative. Such models would reduce the workload of healthcare professionals and provide greater insight into patients' quality of life, which is a critical indicator of treatment effectiveness. To achie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03782v1-abstract-full').style.display = 'inline'; document.getElementById('2308.03782v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.03782v1-abstract-full" style="display: none;"> The objective of this study is to develop natural language processing (NLP) models that can analyze patients' drug reviews and accurately classify their satisfaction levels as positive, neutral, or negative. Such models would reduce the workload of healthcare professionals and provide greater insight into patients' quality of life, which is a critical indicator of treatment effectiveness. To achieve this, we implemented and evaluated several classification models, including a BERT base model, Bio+Clinical BERT, and a simpler CNN. Results indicate that the medical domain-specific Bio+Clinical BERT model significantly outperformed the general domain base BERT model, achieving macro f1 and recall score improvement of 11%, as shown in Table 2. Future research could explore how to capitalize on the specific strengths of each model. Bio+Clinical BERT excels in overall performance, particularly with medical jargon, while the simpler CNN demonstrates the ability to identify crucial words and accurately classify sentiment in texts with conflicting sentiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03782v1-abstract-full').style.display = 'none'; document.getElementById('2308.03782v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">KDD 2023 Workshop on Applied Data Science for Healthcare</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11130">arXiv:2307.11130</a> <span> [<a href="https://arxiv.org/pdf/2307.11130">pdf</a>, <a href="https://arxiv.org/format/2307.11130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Frequency-aware optical coherence tomography image super-resolution via conditional generative adversarial neural network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xueshen Li</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhenxing Dong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongshan Liu</a>, <a href="/search/cs?searchtype=author&query=Kang-Mieler%2C+J+J">Jennifer J. Kang-Mieler</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuye Ling</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yu Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11130v1-abstract-short" style="display: inline;"> Optical coherence tomography (OCT) has stimulated a wide range of medical image-based diagnosis and treatment in fields such as cardiology and ophthalmology. Such applications can be further facilitated by deep learning-based super-resolution technology, which improves the capability of resolving morphological structures. However, existing deep learning-based method only focuses on spatial distrib… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11130v1-abstract-full').style.display = 'inline'; document.getElementById('2307.11130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11130v1-abstract-full" style="display: none;"> Optical coherence tomography (OCT) has stimulated a wide range of medical image-based diagnosis and treatment in fields such as cardiology and ophthalmology. Such applications can be further facilitated by deep learning-based super-resolution technology, which improves the capability of resolving morphological structures. However, existing deep learning-based method only focuses on spatial distribution and disregard frequency fidelity in image reconstruction, leading to a frequency bias. To overcome this limitation, we propose a frequency-aware super-resolution framework that integrates three critical frequency-based modules (i.e., frequency transformation, frequency skip connection, and frequency alignment) and frequency-based loss function into a conditional generative adversarial network (cGAN). We conducted a large-scale quantitative study from an existing coronary OCT dataset to demonstrate the superiority of our proposed framework over existing deep learning frameworks. In addition, we confirmed the generalizability of our framework by applying it to fish corneal images and rat retinal images, demonstrating its capability to super-resolve morphological details in eye imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11130v1-abstract-full').style.display = 'none'; document.getElementById('2307.11130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures, submitted to Biomedical Optics Express special issue</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04231">arXiv:2307.04231</a> <span> [<a href="https://arxiv.org/pdf/2307.04231">pdf</a>, <a href="https://arxiv.org/format/2307.04231">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mx2M: Masked Cross-Modality Modeling in Domain Adaptation for 3D Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Boxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zunran Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yonggen Ling</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+Y">Yuanyuan Guan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shenghao Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenhui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04231v1-abstract-short" style="display: inline;"> Existing methods of cross-modal domain adaptation for 3D semantic segmentation predict results only via 2D-3D complementarity that is obtained by cross-modal feature matching. However, as lacking supervision in the target domain, the complementarity is not always reliable. The results are not ideal when the domain gap is large. To solve the problem of lacking supervision, we introduce masked model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04231v1-abstract-full').style.display = 'inline'; document.getElementById('2307.04231v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04231v1-abstract-full" style="display: none;"> Existing methods of cross-modal domain adaptation for 3D semantic segmentation predict results only via 2D-3D complementarity that is obtained by cross-modal feature matching. However, as lacking supervision in the target domain, the complementarity is not always reliable. The results are not ideal when the domain gap is large. To solve the problem of lacking supervision, we introduce masked modeling into this task and propose a method Mx2M, which utilizes masked cross-modality modeling to reduce the large domain gap. Our Mx2M contains two components. One is the core solution, cross-modal removal and prediction (xMRP), which makes the Mx2M adapt to various scenarios and provides cross-modal self-supervision. The other is a new way of cross-modal feature matching, the dynamic cross-modal filter (DxMF) that ensures the whole method dynamically uses more suitable 2D-3D complementarity. Evaluation of the Mx2M on three DA scenarios, including Day/Night, USA/Singapore, and A2D2/SemanticKITTI, brings large improvements over previous methods on many metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04231v1-abstract-full').style.display = 'none'; document.getElementById('2307.04231v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.11025">arXiv:2306.11025</a> <span> [<a href="https://arxiv.org/pdf/2306.11025">pdf</a>, <a href="https://arxiv.org/ps/2306.11025">ps</a>, <a href="https://arxiv.org/format/2306.11025">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Finance">q-fin.ST</span> </div> </div> <p class="title is-5 mathjax"> Temporal Data Meets LLM -- Explainable Financial Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xinli Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zheng Chen</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuan Ling</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+S">Shujing Dong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zongyi Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yanbin Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.11025v1-abstract-short" style="display: inline;"> This paper presents a novel study on harnessing Large Language Models' (LLMs) outstanding knowledge and reasoning abilities for explainable financial time series forecasting. The application of machine learning models to financial time series comes with several challenges, including the difficulty in cross-sequence reasoning and inference, the hurdle of incorporating multi-modal signals from histo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11025v1-abstract-full').style.display = 'inline'; document.getElementById('2306.11025v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.11025v1-abstract-full" style="display: none;"> This paper presents a novel study on harnessing Large Language Models' (LLMs) outstanding knowledge and reasoning abilities for explainable financial time series forecasting. The application of machine learning models to financial time series comes with several challenges, including the difficulty in cross-sequence reasoning and inference, the hurdle of incorporating multi-modal signals from historical news, financial knowledge graphs, etc., and the issue of interpreting and explaining the model results. In this paper, we focus on NASDAQ-100 stocks, making use of publicly accessible historical stock price data, company metadata, and historical economic/financial news. We conduct experiments to illustrate the potential of LLMs in offering a unified solution to the aforementioned challenges. Our experiments include trying zero-shot/few-shot inference with GPT-4 and instruction-based fine-tuning with a public LLM model Open LLaMA. We demonstrate our approach outperforms a few baselines, including the widely applied classic ARMA-GARCH model and a gradient-boosting tree model. Through the performance comparison results and a few examples, we find LLMs can make a well-thought decision by reasoning over information from both textual news and price time series and extracting insights, leveraging cross-sequence information, and utilizing the inherent knowledge embedded within the LLM. Additionally, we show that a publicly available LLM such as Open-LLaMA, after fine-tuning, can comprehend the instruction to generate explainable forecasts and achieve reasonable performance, albeit relatively inferior in comparison to GPT-4. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11025v1-abstract-full').style.display = 'none'; document.getElementById('2306.11025v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.2; I.2.7; I.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14655">arXiv:2305.14655</a> <span> [<a href="https://arxiv.org/pdf/2305.14655">pdf</a>, <a href="https://arxiv.org/format/2305.14655">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Survival Distribution with Implicit Survival Function </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yu Ling</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+W">Weimin Tan</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+B">Bo Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14655v1-abstract-short" style="display: inline;"> Survival analysis aims at modeling the relationship between covariates and event occurrence with some untracked (censored) samples. In implementation, existing methods model the survival distribution with strong assumptions or in a discrete time space for likelihood estimation with censorship, which leads to weak generalization. In this paper, we propose Implicit Survival Function (ISF) based on I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14655v1-abstract-full').style.display = 'inline'; document.getElementById('2305.14655v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14655v1-abstract-full" style="display: none;"> Survival analysis aims at modeling the relationship between covariates and event occurrence with some untracked (censored) samples. In implementation, existing methods model the survival distribution with strong assumptions or in a discrete time space for likelihood estimation with censorship, which leads to weak generalization. In this paper, we propose Implicit Survival Function (ISF) based on Implicit Neural Representation for survival distribution estimation without strong assumptions,and employ numerical integration to approximate the cumulative distribution function for prediction and optimization. Experimental results show that ISF outperforms the state-of-the-art methods in three public datasets and has robustness to the hyperparameter controlling estimation precision. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14655v1-abstract-full').style.display = 'none'; document.getElementById('2305.14655v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.07223">arXiv:2305.07223</a> <span> [<a href="https://arxiv.org/pdf/2305.07223">pdf</a>, <a href="https://arxiv.org/format/2305.07223">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Transavs: End-To-End Audio-Visual Segmentation With Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuhang Ling</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuxi Li</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Z">Zhenye Gan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiangning Zhang</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+M">Mingmin Chi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yabiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.07223v2-abstract-short" style="display: inline;"> Audio-Visual Segmentation (AVS) is a challenging task, which aims to segment sounding objects in video frames by exploring audio signals. Generally AVS faces two key challenges: (1) Audio signals inherently exhibit a high degree of information density, as sounds produced by multiple objects are entangled within the same audio stream; (2) Objects of the same category tend to produce similar audio s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07223v2-abstract-full').style.display = 'inline'; document.getElementById('2305.07223v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.07223v2-abstract-full" style="display: none;"> Audio-Visual Segmentation (AVS) is a challenging task, which aims to segment sounding objects in video frames by exploring audio signals. Generally AVS faces two key challenges: (1) Audio signals inherently exhibit a high degree of information density, as sounds produced by multiple objects are entangled within the same audio stream; (2) Objects of the same category tend to produce similar audio signals, making it difficult to distinguish between them and thus leading to unclear segmentation results. Toward this end, we propose TransAVS, the first Transformer-based end-to-end framework for AVS task. Specifically, TransAVS disentangles the audio stream as audio queries, which will interact with images and decode into segmentation masks with full transformer architectures. This scheme not only promotes comprehensive audio-image communication but also explicitly excavates instance cues encapsulated in the scene. Meanwhile, to encourage these audio queries to capture distinctive sounding objects instead of degrading to be homogeneous, we devise two self-supervised loss functions at both query and mask levels, allowing the model to capture distinctive features within similar audio data and achieve more precise segmentation. Our experiments demonstrate that TransAVS achieves state-of-the-art results on the AVSBench dataset, highlighting its effectiveness in bridging the gap between audio and visual modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07223v2-abstract-full').style.display = 'none'; document.getElementById('2305.07223v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.02226">arXiv:2304.02226</a> <span> [<a href="https://arxiv.org/pdf/2304.02226">pdf</a>, <a href="https://arxiv.org/ps/2304.02226">ps</a>, <a href="https://arxiv.org/format/2304.02226">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Maxflow-Based Bounds for Low-Rate Information Propagation over Noisy Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.02226v1-abstract-short" style="display: inline;"> We study error exponents for the problem of low-rate communication over a directed graph, where each edge in the graph represents a noisy communication channel, and there is a single source and destination. We derive maxflow-based achievability and converse bounds on the error exponent that match when there are two messages and all channels satisfy a symmetry condition called pairwise reversibilit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.02226v1-abstract-full').style.display = 'inline'; document.getElementById('2304.02226v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.02226v1-abstract-full" style="display: none;"> We study error exponents for the problem of low-rate communication over a directed graph, where each edge in the graph represents a noisy communication channel, and there is a single source and destination. We derive maxflow-based achievability and converse bounds on the error exponent that match when there are two messages and all channels satisfy a symmetry condition called pairwise reversibility. More generally, we show that the upper and lower bounds match to within a factor of 4. We also show that with three messages there are cases where the maxflow-based error exponent is strictly suboptimal, thus showing that our tightness result cannot be extended beyond two messages without further assumptions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.02226v1-abstract-full').style.display = 'none'; document.getElementById('2304.02226v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.07011">arXiv:2210.07011</a> <span> [<a href="https://arxiv.org/pdf/2210.07011">pdf</a>, <a href="https://arxiv.org/format/2210.07011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Variational Graph Generator for Multi-View Graph Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianpeng Chen</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yawen Ling</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yazhou Ren</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shudong Huang</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+X">Xiaorong Pu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Z">Zhifeng Hao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lifang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.07011v3-abstract-short" style="display: inline;"> Multi-view graph clustering (MGC) methods are increasingly being studied due to the explosion of multi-view data with graph structural information. The critical point of MGC is to better utilize view-specific and view-common information in features and graphs of multiple views. However, existing works have an inherent limitation that they are unable to concurrently utilize the consensus graph info… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.07011v3-abstract-full').style.display = 'inline'; document.getElementById('2210.07011v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.07011v3-abstract-full" style="display: none;"> Multi-view graph clustering (MGC) methods are increasingly being studied due to the explosion of multi-view data with graph structural information. The critical point of MGC is to better utilize view-specific and view-common information in features and graphs of multiple views. However, existing works have an inherent limitation that they are unable to concurrently utilize the consensus graph information across multiple graphs and the view-specific feature information. To address this issue, we propose Variational Graph Generator for Multi-View Graph Clustering (VGMGC). Specifically, a novel variational graph generator is proposed to extract common information among multiple graphs. This generator infers a reliable variational consensus graph based on a priori assumption over multiple graphs. Then a simple yet effective graph encoder in conjunction with the multi-view clustering objective is presented to learn the desired graph embeddings for clustering, which embeds the inferred view-common graph and view-specific graphs together with features. Finally, theoretical results illustrate the rationality of the VGMGC by analyzing the uncertainty of the inferred consensus graph with the information bottleneck principle.Extensive experiments demonstrate the superior performance of our VGMGC over SOTAs. The source code is publicly available at https://github.com/cjpcool/VGMGC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.07011v3-abstract-full').style.display = 'none'; document.getElementById('2210.07011v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by TNNLS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.01295">arXiv:2210.01295</a> <span> [<a href="https://arxiv.org/pdf/2210.01295">pdf</a>, <a href="https://arxiv.org/format/2210.01295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Max-Quantile Grouped Infinite-Arm Bandits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lau%2C+I">Ivan Lau</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Shrivastava%2C+M">Mayank Shrivastava</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.01295v3-abstract-short" style="display: inline;"> In this paper, we consider a bandit problem in which there are a number of groups each consisting of infinitely many arms. Whenever a new arm is requested from a given group, its mean reward is drawn from an unknown reservoir distribution (different for each group), and the uncertainty in the arm's mean reward can only be reduced via subsequent pulls of the arm. The goal is to identify the infinit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01295v3-abstract-full').style.display = 'inline'; document.getElementById('2210.01295v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.01295v3-abstract-full" style="display: none;"> In this paper, we consider a bandit problem in which there are a number of groups each consisting of infinitely many arms. Whenever a new arm is requested from a given group, its mean reward is drawn from an unknown reservoir distribution (different for each group), and the uncertainty in the arm's mean reward can only be reduced via subsequent pulls of the arm. The goal is to identify the infinite-arm group whose reservoir distribution has the highest $(1-伪)$-quantile (e.g., median if $伪= \frac{1}{2}$), using as few total arm pulls as possible. We introduce a two-step algorithm that first requests a fixed number of arms from each group and then runs a finite-arm grouped max-quantile bandit algorithm. We characterize both the instance-dependent and worst-case regret, and provide a matching lower bound for the latter, while discussing various strengths, weaknesses, algorithmic improvements, and potential lower bounds associated with our instance-dependent upper bounds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01295v3-abstract-full').style.display = 'none'; document.getElementById('2210.01295v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ALT 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.08924">arXiv:2209.08924</a> <span> [<a href="https://arxiv.org/pdf/2209.08924">pdf</a>, <a href="https://arxiv.org/format/2209.08924">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HVC-Net: Unifying Homography, Visibility, and Confidence Learning for Planar Object Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haoxian Zhang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yonggen Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.08924v1-abstract-short" style="display: inline;"> Robust and accurate planar tracking over a whole video sequence is vitally important for many vision applications. The key to planar object tracking is to find object correspondences, modeled by homography, between the reference image and the tracked image. Existing methods tend to obtain wrong correspondences with changing appearance variations, camera-object relative motions and occlusions. To a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08924v1-abstract-full').style.display = 'inline'; document.getElementById('2209.08924v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.08924v1-abstract-full" style="display: none;"> Robust and accurate planar tracking over a whole video sequence is vitally important for many vision applications. The key to planar object tracking is to find object correspondences, modeled by homography, between the reference image and the tracked image. Existing methods tend to obtain wrong correspondences with changing appearance variations, camera-object relative motions and occlusions. To alleviate this problem, we present a unified convolutional neural network (CNN) model that jointly considers homography, visibility, and confidence. First, we introduce correlation blocks that explicitly account for the local appearance changes and camera-object relative motions as the base of our model. Second, we jointly learn the homography and visibility that links camera-object relative motions with occlusions. Third, we propose a confidence module that actively monitors the estimation quality from the pixel correlation distributions obtained in correlation blocks. All these modules are plugged into a Lucas-Kanade (LK) tracking pipeline to obtain both accurate and robust planar object tracking. Our approach outperforms the state-of-the-art methods on public POT and TMT datasets. Its superior performance is also verified on a real-world application, synthesizing high-quality in-video advertisements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08924v1-abstract-full').style.display = 'none'; document.getElementById('2209.08924v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ECCV 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.09116">arXiv:2208.09116</a> <span> [<a href="https://arxiv.org/pdf/2208.09116">pdf</a>, <a href="https://arxiv.org/format/2208.09116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Effective, Platform-Independent GUI Testing via Image Embedding and Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shengcheng Yu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xin Li</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuchen Ling</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhendong Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.09116v2-abstract-short" style="display: inline;"> Software applications have been playing an increasingly important role in various aspects of society. In particular, mobile apps and web apps are the most prevalent among all applications and are widely used in various industries as well as in people's daily lives. To help ensure mobile and web app quality, many approaches have been introduced to improve app GUI testing via automated exploration.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09116v2-abstract-full').style.display = 'inline'; document.getElementById('2208.09116v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.09116v2-abstract-full" style="display: none;"> Software applications have been playing an increasingly important role in various aspects of society. In particular, mobile apps and web apps are the most prevalent among all applications and are widely used in various industries as well as in people's daily lives. To help ensure mobile and web app quality, many approaches have been introduced to improve app GUI testing via automated exploration. Despite the extensive effort, existing approaches are still limited in reaching high code coverage, constructing high-quality models, and being generally applicable. Reinforcement learning-based approaches are faced with difficult challenges, including effective app state abstraction, reward function design, etc. Moreover, they heavily depend on the specific execution platforms, thus leading to poor generalizability and being unable to adapt to different platforms. We propose PIRLTest, an effective platform-independent approach for app testing. It utilizes computer vision and reinforcement learning techniques in a novel, synergistic manner for automated testing. It extracts the GUI widgets from GUI pages and characterizes the corresponding GUI layouts, embedding the GUI pages as states. The app GUI state combines the macroscopic perspective and the microscopic perspective, and attaches the critical semantic information from GUI images. This enables PIRLTest to be platform-independent and makes the testing approach generally applicable on different platforms. PIRLTest explores apps with the guidance of a curiosity-driven strategy, which uses a Q-network to estimate the values of specific state-action pairs to encourage more exploration in uncovered pages without platform dependency. The exploration will be assigned with rewards for all actions, which are designed considering both the app GUI states and the concrete widgets, to help the framework explore more uncovered pages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09116v2-abstract-full').style.display = 'none'; document.getElementById('2208.09116v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM Transactions on Software Engineering and Methodology in 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.02003">arXiv:2208.02003</a> <span> [<a href="https://arxiv.org/pdf/2208.02003">pdf</a>, <a href="https://arxiv.org/ps/2208.02003">ps</a>, <a href="https://arxiv.org/format/2208.02003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Multi-Bit Relaying over a Tandem of Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.02003v3-abstract-short" style="display: inline;"> We study error exponents for the problem of relaying a message over a tandem of two channels sharing the same transition law, in particular moving beyond the 1-bit setting studied in recent related works. Our main results show that the 1-hop and 2-hop exponents coincide in both of the following settings: (i) the number of messages is fixed, and the channel law satisfies a condition called pairwise… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02003v3-abstract-full').style.display = 'inline'; document.getElementById('2208.02003v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.02003v3-abstract-full" style="display: none;"> We study error exponents for the problem of relaying a message over a tandem of two channels sharing the same transition law, in particular moving beyond the 1-bit setting studied in recent related works. Our main results show that the 1-hop and 2-hop exponents coincide in both of the following settings: (i) the number of messages is fixed, and the channel law satisfies a condition called pairwise reversibility, or (ii) the channel is arbitrary, and a zero-rate limit is taken from above. In addition, we provide various extensions of our results that relax the assumptions of pairwise reversibility and/or the two channels having identical transition laws, and we provide an example for which the 2-hop exponent is strictly below the 1-hop exponent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02003v3-abstract-full').style.display = 'none'; document.getElementById('2208.02003v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Information Theory</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.12002">arXiv:2207.12002</a> <span> [<a href="https://arxiv.org/pdf/2207.12002">pdf</a>, <a href="https://arxiv.org/ps/2207.12002">ps</a>, <a href="https://arxiv.org/format/2207.12002">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> An Optimal Motion Planning Framework for Quadruped Jumping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhitao Song</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+L">Linzhu Yue</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guangli Sun</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yihu Ling</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hongshuo Wei</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+L">Linhai Gui</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yun-Hui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.12002v1-abstract-short" style="display: inline;"> This paper presents an optimal motion planning framework to generate versatile energy-optimal quadrupedal jumping motions automatically (e.g., flips, spin). The jumping motions via the centroidal dynamics are formulated as a 12-dimensional black-box optimization problem subject to the robot kino-dynamic constraints. Gradient-based approaches offer great success in addressing trajectory optimizatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.12002v1-abstract-full').style.display = 'inline'; document.getElementById('2207.12002v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.12002v1-abstract-full" style="display: none;"> This paper presents an optimal motion planning framework to generate versatile energy-optimal quadrupedal jumping motions automatically (e.g., flips, spin). The jumping motions via the centroidal dynamics are formulated as a 12-dimensional black-box optimization problem subject to the robot kino-dynamic constraints. Gradient-based approaches offer great success in addressing trajectory optimization (TO), yet, prior knowledge (e.g., reference motion, contact schedule) is required and results in sub-optimal solutions. The new proposed framework first employed a heuristics-based optimization method to avoid these problems. Moreover, a prioritization fitness function is created for heuristics-based algorithms in robot ground reaction force (GRF) planning, enhancing convergence and searching performance considerably. Since heuristics-based algorithms often require significant time, motions are planned offline and stored as a pre-motion library. A selector is designed to automatically choose motions with user-specified or perception information as input. The proposed framework has been successfully validated only with a simple continuously tracking PD controller in an open-source Mini-Cheetah by several challenging jumping motions, including jumping over a window-shaped obstacle with 30 cm height and left-flipping over a rectangle obstacle with 27 cm height. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.12002v1-abstract-full').style.display = 'none'; document.getElementById('2207.12002v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accept by IROS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.03943">arXiv:2205.03943</a> <span> [<a href="https://arxiv.org/pdf/2205.03943">pdf</a>, <a href="https://arxiv.org/format/2205.03943">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3528233.3530728">10.1145/3528233.3530728 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3528233.3530728">10.1145/3528233.3530728 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3528233.3530728">10.1145/3528233.3530728 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Learning to Brachiate via Simplified Model Imitation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reda%2C+D">Daniele Reda</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+H+Y">Hung Yu Ling</a>, <a href="/search/cs?searchtype=author&query=van+de+Panne%2C+M">Michiel van de Panne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.03943v1-abstract-short" style="display: inline;"> Brachiation is the primary form of locomotion for gibbons and siamangs, in which these primates swing from tree limb to tree limb using only their arms. It is challenging to control because of the limited control authority, the required advance planning, and the precision of the required grasps. We present a novel approach to this problem using reinforcement learning, and as demonstrated on a fing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03943v1-abstract-full').style.display = 'inline'; document.getElementById('2205.03943v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.03943v1-abstract-full" style="display: none;"> Brachiation is the primary form of locomotion for gibbons and siamangs, in which these primates swing from tree limb to tree limb using only their arms. It is challenging to control because of the limited control authority, the required advance planning, and the precision of the required grasps. We present a novel approach to this problem using reinforcement learning, and as demonstrated on a finger-less 14-link planar model that learns to brachiate across challenging handhold sequences. Key to our method is the use of a simplified model, a point mass with a virtual arm, for which we first learn a policy that can brachiate across handhold sequences with a prescribed order. This facilitates the learning of the policy for the full model, for which it provides guidance by providing an overall center-of-mass trajectory to imitate, as well as for the timing of the holds. Lastly, the simplified model can also readily be used for planning suitable sequences of handholds in a given environment. Our results demonstrate brachiation motions with a variety of durations for the flight and hold phases, as well as emergent extra back-and-forth swings when this proves useful. The system is evaluated with a variety of ablations. The method enables future work towards more general 3D brachiation, as well as using simplified model imitation in other settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.03943v1-abstract-full').style.display = 'none'; document.getElementById('2205.03943v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures. Accepted at SIGGRAPH 2022. For videos, supplementary material and code, visit the following URL https://brachiation-rl.github.io/brachiation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.11769">arXiv:2204.11769</a> <span> [<a href="https://arxiv.org/pdf/2204.11769">pdf</a>, <a href="https://arxiv.org/ps/2204.11769">ps</a>, <a href="https://arxiv.org/format/2204.11769">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-scale reconstruction of undersampled spectral-spatial OCT data for coronary imaging using deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xueshen Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+S">Shengting Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongshan Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinwen Yao</a>, <a href="/search/cs?searchtype=author&query=Brott%2C+B+C">Brigitta C. Brott</a>, <a href="/search/cs?searchtype=author&query=Litovsky%2C+S+H">Silvio H. Litovsky</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xiaoyu Song</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuye Ling</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yu Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.11769v1-abstract-short" style="display: inline;"> Coronary artery disease (CAD) is a cardiovascular condition with high morbidity and mortality. Intravascular optical coherence tomography (IVOCT) has been considered as an optimal imagining system for the diagnosis and treatment of CAD. Constrained by Nyquist theorem, dense sampling in IVOCT attains high resolving power to delineate cellular structures/ features. There is a trade-off between high… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.11769v1-abstract-full').style.display = 'inline'; document.getElementById('2204.11769v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.11769v1-abstract-full" style="display: none;"> Coronary artery disease (CAD) is a cardiovascular condition with high morbidity and mortality. Intravascular optical coherence tomography (IVOCT) has been considered as an optimal imagining system for the diagnosis and treatment of CAD. Constrained by Nyquist theorem, dense sampling in IVOCT attains high resolving power to delineate cellular structures/ features. There is a trade-off between high spatial resolution and fast scanning rate for coronary imaging. In this paper, we propose a viable spectral-spatial acquisition method that down-scales the sampling process in both spectral and spatial domain while maintaining high quality in image reconstruction. The down-scaling schedule boosts data acquisition speed without any hardware modifications. Additionally, we propose a unified multi-scale reconstruction framework, namely Multiscale- Spectral-Spatial-Magnification Network (MSSMN), to resolve highly down-scaled (compressed) OCT images with flexible magnification factors. We incorporate the proposed methods into Spectral Domain OCT (SD-OCT) imaging of human coronary samples with clinical features such as stent and calcified lesions. Our experimental results demonstrate that spectral-spatial downscaled data can be better reconstructed than data that is downscaled solely in either spectral or spatial domain. Moreover, we observe better reconstruction performance using MSSMN than using existing reconstruction methods. Our acquisition method and multi-scale reconstruction framework, in combination, may allow faster SD-OCT inspection with high resolution during coronary intervention. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.11769v1-abstract-full').style.display = 'none'; document.getElementById('2204.11769v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures, reviewed by IEEE trans BME</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07955">arXiv:2204.07955</a> <span> [<a href="https://arxiv.org/pdf/2204.07955">pdf</a>, <a href="https://arxiv.org/format/2204.07955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Vision-Language Pre-Training for Multimodal Aspect-Based Sentiment Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yan Ling</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jianfei Yu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+R">Rui Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07955v2-abstract-short" style="display: inline;"> As an important task in sentiment analysis, Multimodal Aspect-Based Sentiment Analysis (MABSA) has attracted increasing attention in recent years. However, previous approaches either (i) use separately pre-trained visual and textual models, which ignore the crossmodal alignment or (ii) use vision-language models pre-trained with general pre-training tasks, which are inadequate to identify finegrai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07955v2-abstract-full').style.display = 'inline'; document.getElementById('2204.07955v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07955v2-abstract-full" style="display: none;"> As an important task in sentiment analysis, Multimodal Aspect-Based Sentiment Analysis (MABSA) has attracted increasing attention in recent years. However, previous approaches either (i) use separately pre-trained visual and textual models, which ignore the crossmodal alignment or (ii) use vision-language models pre-trained with general pre-training tasks, which are inadequate to identify finegrained aspects, opinions, and their alignments across modalities. To tackle these limitations, we propose a task-specific Vision-Language Pre-training framework for MABSA (VLPMABSA), which is a unified multimodal encoder-decoder architecture for all the pretraining and downstream tasks. We further design three types of task-specific pre-training tasks from the language, vision, and multimodal modalities, respectively. Experimental results show that our approach generally outperforms the state-of-the-art approaches on three MABSA subtasks. Further analysis demonstrates the effectiveness of each pretraining task. The source code is publicly released at https://github.com/NUSTM/VLP-MABSA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07955v2-abstract-full').style.display = 'none'; document.getElementById('2204.07955v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACL 2022 (long paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.01675">arXiv:2203.01675</a> <span> [<a href="https://arxiv.org/pdf/2203.01675">pdf</a>, <a href="https://arxiv.org/format/2203.01675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cross-Modality Earth Mover's Distance for Visible Thermal Person Re-Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yongguo Ling</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Z">Zhun Zhong</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+D">Donglin Cao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Zhiming Luo</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yaojin Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shaozi Li</a>, <a href="/search/cs?searchtype=author&query=Sebe%2C+N">Nicu Sebe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.01675v1-abstract-short" style="display: inline;"> Visible thermal person re-identification (VT-ReID) suffers from the inter-modality discrepancy and intra-identity variations. Distribution alignment is a popular solution for VT-ReID, which, however, is usually restricted to the influence of the intra-identity variations. In this paper, we propose the Cross-Modality Earth Mover's Distance (CM-EMD) that can alleviate the impact of the intra-identit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.01675v1-abstract-full').style.display = 'inline'; document.getElementById('2203.01675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.01675v1-abstract-full" style="display: none;"> Visible thermal person re-identification (VT-ReID) suffers from the inter-modality discrepancy and intra-identity variations. Distribution alignment is a popular solution for VT-ReID, which, however, is usually restricted to the influence of the intra-identity variations. In this paper, we propose the Cross-Modality Earth Mover's Distance (CM-EMD) that can alleviate the impact of the intra-identity variations during modality alignment. CM-EMD selects an optimal transport strategy and assigns high weights to pairs that have a smaller intra-identity variation. In this manner, the model will focus on reducing the inter-modality discrepancy while paying less attention to intra-identity variations, leading to a more effective modality alignment. Moreover, we introduce two techniques to improve the advantage of CM-EMD. First, the Cross-Modality Discrimination Learning (CM-DL) is designed to overcome the discrimination degradation problem caused by modality alignment. By reducing the ratio between intra-identity and inter-identity variances, CM-DL leads the model to learn more discriminative representations. Second, we construct the Multi-Granularity Structure (MGS), enabling us to align modalities from both coarse- and fine-grained levels with the proposed CM-EMD. Extensive experiments show the benefits of the proposed CM-EMD and its auxiliary techniques (CM-DL and MGS). Our method achieves state-of-the-art performance on two VT-ReID benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.01675v1-abstract-full').style.display = 'none'; document.getElementById('2203.01675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.11377">arXiv:2202.11377</a> <span> [<a href="https://arxiv.org/pdf/2202.11377">pdf</a>, <a href="https://arxiv.org/format/2202.11377">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-scale Sparse Representation-Based Shadow Inpainting for Retinal OCT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yaoqi Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yufan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongshan Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaxuan Li</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+P">Peiyao Jin</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yu Gan</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuye Ling</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yikai Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.11377v1-abstract-short" style="display: inline;"> Inpainting shadowed regions cast by superficial blood vessels in retinal optical coherence tomography (OCT) images is critical for accurate and robust machine analysis and clinical diagnosis. Traditional sequence-based approaches such as propagating neighboring information to gradually fill in the missing regions are cost-effective. But they generate less satisfactory outcomes when dealing with la… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.11377v1-abstract-full').style.display = 'inline'; document.getElementById('2202.11377v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.11377v1-abstract-full" style="display: none;"> Inpainting shadowed regions cast by superficial blood vessels in retinal optical coherence tomography (OCT) images is critical for accurate and robust machine analysis and clinical diagnosis. Traditional sequence-based approaches such as propagating neighboring information to gradually fill in the missing regions are cost-effective. But they generate less satisfactory outcomes when dealing with larger missing regions and texture-rich structures. Emerging deep learning-based methods such as encoder-decoder networks have shown promising results in natural image inpainting tasks. However, they typically need a long computational time for network training in addition to the high demand on the size of datasets, which makes it difficult to be applied on often small medical datasets. To address these challenges, we propose a novel multi-scale shadow inpainting framework for OCT images by synergically applying sparse representation and deep learning: sparse representation is used to extract features from a small amount of training images for further inpainting and to regularize the image after the multi-scale image fusion, while convolutional neural network (CNN) is employed to enhance the image quality. During the image inpainting, we divide preprocessed input images into different branches based on the shadow width to harvest complementary information from different scales. Finally, a sparse representation-based regularizing module is designed to refine the generated contents after multi-scale feature aggregation. Experiments are conducted to compare our proposal versus both traditional and deep learning-based techniques on synthetic and real-world shadows. Results demonstrate that our proposed method achieves favorable image inpainting in terms of visual quality and quantitative metrics, especially when wide shadows are presented. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.11377v1-abstract-full').style.display = 'none'; document.getElementById('2202.11377v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.14375">arXiv:2112.14375</a> <span> [<a href="https://arxiv.org/pdf/2112.14375">pdf</a>, <a href="https://arxiv.org/ps/2112.14375">ps</a>, <a href="https://arxiv.org/format/2112.14375">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Variational Learning for the Inverted Beta-Liouville Mixture Model and Its Application to Text Categorization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yongfa Ling</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+W">Wenbo Guan</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+Q">Qiang Ruan</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Heping Song</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yuping Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.14375v1-abstract-short" style="display: inline;"> The finite invert Beta-Liouville mixture model (IBLMM) has recently gained some attention due to its positive data modeling capability. Under the conventional variational inference (VI) framework, the analytically tractable solution to the optimization of the variational posterior distribution cannot be obtained, since the variational object function involves evaluation of intractable moments. Wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14375v1-abstract-full').style.display = 'inline'; document.getElementById('2112.14375v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.14375v1-abstract-full" style="display: none;"> The finite invert Beta-Liouville mixture model (IBLMM) has recently gained some attention due to its positive data modeling capability. Under the conventional variational inference (VI) framework, the analytically tractable solution to the optimization of the variational posterior distribution cannot be obtained, since the variational object function involves evaluation of intractable moments. With the recently proposed extended variational inference (EVI) framework, a new function is proposed to replace the original variational object function in order to avoid intractable moment computation, so that the analytically tractable solution of the IBLMM can be derived in an elegant way. The good performance of the proposed approach is demonstrated by experiments with both synthesized data and a real-world application namely text categorization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14375v1-abstract-full').style.display = 'none'; document.getElementById('2112.14375v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.07120">arXiv:2112.07120</a> <span> [<a href="https://arxiv.org/pdf/2112.07120">pdf</a>, <a href="https://arxiv.org/format/2112.07120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Simple Coding Techniques for Many-Hop Relaying </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.07120v4-abstract-short" style="display: inline;"> In this paper, we study the problem of relaying a single bit of information across a series of binary symmetric channels, and the associated trade-off between the number of hops $m$, the transmission time $n$, and the error probability. We introduce a simple, efficient, and deterministic protocol that attains positive information velocity (i.e., a non-vanishing ratio $\frac{m}{n}$ and small error… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.07120v4-abstract-full').style.display = 'inline'; document.getElementById('2112.07120v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.07120v4-abstract-full" style="display: none;"> In this paper, we study the problem of relaying a single bit of information across a series of binary symmetric channels, and the associated trade-off between the number of hops $m$, the transmission time $n$, and the error probability. We introduce a simple, efficient, and deterministic protocol that attains positive information velocity (i.e., a non-vanishing ratio $\frac{m}{n}$ and small error probability) and is significantly simpler than existing protocols that do so. In addition, we characterize the optimal low-noise and high-noise scaling laws of the information velocity, and we adapt our 1-bit protocol to transmit $k$ bits over $m$ hops with $O(m+k)$ transmission time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.07120v4-abstract-full').style.display = 'none'; document.getElementById('2112.07120v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Information Theory, Volume 68, Issue 11, pp. 7043-7053, Nov. 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.03301">arXiv:2111.03301</a> <span> [<a href="https://arxiv.org/pdf/2111.03301">pdf</a>, <a href="https://arxiv.org/format/2111.03301">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Frequency-Aware Physics-Inspired Degradation Model for Real-World Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhenxing Dong</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Hong Cao</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+W">Wang Shen</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Y">Yu Gan</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yuye Ling</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+G">Guangtao Zhai</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yikai Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.03301v2-abstract-short" style="display: inline;"> Current learning-based single image super-resolution (SISR) algorithms underperform on real data due to the deviation in the assumed degrada-tion process from that in the real-world scenario. Conventional degradation processes consider applying blur, noise, and downsampling (typicallybicubic downsampling) on high-resolution (HR) images to synthesize low-resolution (LR) counterparts. However, few w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.03301v2-abstract-full').style.display = 'inline'; document.getElementById('2111.03301v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.03301v2-abstract-full" style="display: none;"> Current learning-based single image super-resolution (SISR) algorithms underperform on real data due to the deviation in the assumed degrada-tion process from that in the real-world scenario. Conventional degradation processes consider applying blur, noise, and downsampling (typicallybicubic downsampling) on high-resolution (HR) images to synthesize low-resolution (LR) counterparts. However, few works on degradation modelling have taken the physical aspects of the optical imaging system intoconsideration. In this paper, we analyze the imaging system optically andexploit the characteristics of the real-world LR-HR pairs in the spatial frequency domain. We formulate a real-world physics-inspired degradationmodel by considering bothopticsandsensordegradation; The physical degradation of an imaging system is modelled as a low-pass filter, whose cut-off frequency is dictated by the object distance, the focal length of thelens, and the pixel size of the image sensor. In particular, we propose to use a convolutional neural network (CNN) to learn the cutoff frequency of real-world degradation process. The learned network is then applied to synthesize LR images from unpaired HR images. The synthetic HR-LR image pairs are later used to train an SISR network. We evaluatethe effectiveness and generalization capability of the proposed degradation model on real-world images captured by different imaging systems. Experimental results showcase that the SISR network trained by using our synthetic data performs favorably against the network using the traditional degradation model. Moreover, our results are comparable to that obtained by the same network trained by using real-world LR-HR pairs, which are challenging to obtain in real scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.03301v2-abstract-full').style.display = 'none'; document.getElementById('2111.03301v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages,12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.00896">arXiv:2110.00896</a> <span> [<a href="https://arxiv.org/pdf/2110.00896">pdf</a>, <a href="https://arxiv.org/format/2110.00896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Disarranged Zone Learning (DZL): An unsupervised and dynamic automatic stenosis recognition methodology based on coronary angiography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+Y">Yanan Dai</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+P">Pengxiong Zhu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bangde Xue</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yun Ling</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xibao Shi</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+L">Liang Geng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.00896v1-abstract-short" style="display: inline;"> We proposed a novel unsupervised methodology named Disarranged Zone Learning (DZL) to automatically recognize stenosis in coronary angiography. The methodology firstly disarranges the frames in a video, secondly it generates an effective zone and lastly trains an encoder-decoder GRU model to learn the capability to recover disarranged frames. The breakthrough of our study is to discover and valida… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.00896v1-abstract-full').style.display = 'inline'; document.getElementById('2110.00896v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.00896v1-abstract-full" style="display: none;"> We proposed a novel unsupervised methodology named Disarranged Zone Learning (DZL) to automatically recognize stenosis in coronary angiography. The methodology firstly disarranges the frames in a video, secondly it generates an effective zone and lastly trains an encoder-decoder GRU model to learn the capability to recover disarranged frames. The breakthrough of our study is to discover and validate the Sequence Intensity (Recover Difficulty) is a measure of Coronary Artery Stenosis Status. Hence, the prediction accuracy of DZL is used as an approximator of coronary stenosis indicator. DZL is an unsupervised methodology and no label engineering effort is needed, the sub GRU model in DZL works as a self-supervised approach. So DZL could theoretically utilize infinitely huge amounts of coronary angiographies to learn and improve performance without laborious data labeling. There is no data preprocessing precondition to run DZL as it dynamically utilizes the whole video, hence it is easy to be implemented and generalized to overcome the data heterogeneity of coronary angiography. The overall average precision score achieves 0.93, AUC achieves 0.8 for this pure methodology. The highest segmented average precision score is 0.98 and the highest segmented AUC is 0.87 for coronary occlusion indicator. Finally, we developed a software demo to implement DZL methodology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.00896v1-abstract-full').style.display = 'none'; document.getElementById('2110.00896v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.12769">arXiv:2109.12769</a> <span> [<a href="https://arxiv.org/pdf/2109.12769">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Heterogeneous Treatment Effect Estimation using machine learning for Healthcare application: tutorial and benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yaobin Ling</a>, <a href="/search/cs?searchtype=author&query=Upadhyaya%2C+P">Pulakesh Upadhyaya</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Luyao Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xiaoqian Jiang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+Y">Yejin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.12769v5-abstract-short" style="display: inline;"> Developing new drugs for target diseases is a time-consuming and expensive task, drug repurposing has become a popular topic in the drug development field. As much health claim data become available, many studies have been conducted on the data. The real-world data is noisy, sparse, and has many confounding factors. In addition, many studies have shown that drugs effects are heterogeneous among th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.12769v5-abstract-full').style.display = 'inline'; document.getElementById('2109.12769v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.12769v5-abstract-full" style="display: none;"> Developing new drugs for target diseases is a time-consuming and expensive task, drug repurposing has become a popular topic in the drug development field. As much health claim data become available, many studies have been conducted on the data. The real-world data is noisy, sparse, and has many confounding factors. In addition, many studies have shown that drugs effects are heterogeneous among the population. Lots of advanced machine learning models about estimating heterogeneous treatment effects (HTE) have emerged in recent years, and have been applied to in econometrics and machine learning communities. These studies acknowledge medicine and drug development as the main application area, but there has been limited translational research from the HTE methodology to drug development. We aim to introduce the HTE methodology to the healthcare area and provide feasibility consideration when translating the methodology with benchmark experiments on healthcare administrative claim data. Also, we want to use benchmark experiments to show how to interpret and evaluate the model when it is applied to healthcare research. By introducing the recent HTE techniques to a broad readership in biomedical informatics communities, we expect to promote the wide adoption of causal inference using machine learning. We also expect to provide the feasibility of HTE for personalized drug effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.12769v5-abstract-full').style.display = 'none'; document.getElementById('2109.12769v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">52 pages, 8 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Biomedical Informatics (2022): 104256 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.11765">arXiv:2109.11765</a> <span> [<a href="https://arxiv.org/pdf/2109.11765">pdf</a>, <a href="https://arxiv.org/format/2109.11765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Dimension Reduction for Data with Heterogeneous Missingness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yurong Ling</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zijing Liu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+J">Jing-Hao Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.11765v2-abstract-short" style="display: inline;"> Dimension reduction plays a pivotal role in analysing high-dimensional data. However, observations with missing values present serious difficulties in directly applying standard dimension reduction techniques. As a large number of dimension reduction approaches are based on the Gram matrix, we first investigate the effects of missingness on dimension reduction by studying the statistical propertie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.11765v2-abstract-full').style.display = 'inline'; document.getElementById('2109.11765v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.11765v2-abstract-full" style="display: none;"> Dimension reduction plays a pivotal role in analysing high-dimensional data. However, observations with missing values present serious difficulties in directly applying standard dimension reduction techniques. As a large number of dimension reduction approaches are based on the Gram matrix, we first investigate the effects of missingness on dimension reduction by studying the statistical properties of the Gram matrix with or without missingness, and then we present a bias-corrected Gram matrix with nice statistical properties under heterogeneous missingness. Extensive empirical results, on both simulated and publicly available real datasets, show that the proposed unbiased Gram matrix can significantly improve a broad spectrum of representative dimension reduction approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.11765v2-abstract-full').style.display = 'none'; document.getElementById('2109.11765v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for the 37th Conference on Uncertainty in Artificial Intelligence (UAI 2021)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.06565">arXiv:2104.06565</a> <span> [<a href="https://arxiv.org/pdf/2104.06565">pdf</a>, <a href="https://arxiv.org/format/2104.06565">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Optimal Rates of Teaching and Learning Under Uncertainty </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+Y+H">Yan Hao Ling</a>, <a href="/search/cs?searchtype=author&query=Scarlett%2C+J">Jonathan Scarlett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.06565v2-abstract-short" style="display: inline;"> In this paper, we consider a recently-proposed model of teaching and learning under uncertainty, in which a teacher receives independent observations of a single bit corrupted by binary symmetric noise, and sequentially transmits to a student through another binary symmetric channel based on the bits observed so far. After a given number $n$ of transmissions, the student outputs an estimate of the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06565v2-abstract-full').style.display = 'inline'; document.getElementById('2104.06565v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.06565v2-abstract-full" style="display: none;"> In this paper, we consider a recently-proposed model of teaching and learning under uncertainty, in which a teacher receives independent observations of a single bit corrupted by binary symmetric noise, and sequentially transmits to a student through another binary symmetric channel based on the bits observed so far. After a given number $n$ of transmissions, the student outputs an estimate of the unknown bit, and we are interested in the exponential decay rate of the error probability as $n$ increases. We propose a novel block-structured teaching strategy in which the teacher encodes the number of 1s received in each block, and show that the resulting error exponent is the binary relative entropy $D\big(\frac{1}{2}\|\max(p,q)\big)$, where $p$ and $q$ are the noise parameters. This matches a trivial converse result based on the data processing inequality, and settles two conjectures of [Jog and Loh, 2021] and [Huleihel, Polyanskiy, and Shayevitz, 2019]. In addition, we show that the computation time required by the teacher and student is linear in $n$. We also study a more general setting in which the binary symmetric channels are replaced by general binary-input discrete memoryless channels. We provide an achievability bound and a converse bound, and show that the two coincide in certain cases, including (i) when the two channels are identical, and (ii) when the student-teacher channel is a binary symmetric channel. More generally, we give sufficient conditions under which our learning rate is the best possible for block-structured protocols. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06565v2-abstract-full').style.display = 'none'; document.getElementById('2104.06565v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Information Theory, Volume 67, Issue 11, pp. 7067-7080, Nov. 2021. This version slightly modifies/expands the 'Existing Results' section</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.14274">arXiv:2103.14274</a> <span> [<a href="https://arxiv.org/pdf/2103.14274">pdf</a>, <a href="https://arxiv.org/format/2103.14274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3386569.3392422">10.1145/3386569.3392422 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Character Controllers Using Motion VAEs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+H+Y">Hung Yu Ling</a>, <a href="/search/cs?searchtype=author&query=Zinno%2C+F">Fabio Zinno</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+G">George Cheng</a>, <a href="/search/cs?searchtype=author&query=van+de+Panne%2C+M">Michiel van de Panne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.14274v1-abstract-short" style="display: inline;"> A fundamental problem in computer animation is that of realizing purposeful and realistic human movement given a sufficiently-rich set of motion capture clips. We learn data-driven generative models of human movement using autoregressive conditional variational autoencoders, or Motion VAEs. The latent variables of the learned autoencoder define the action space for the movement and thereby govern… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.14274v1-abstract-full').style.display = 'inline'; document.getElementById('2103.14274v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.14274v1-abstract-full" style="display: none;"> A fundamental problem in computer animation is that of realizing purposeful and realistic human movement given a sufficiently-rich set of motion capture clips. We learn data-driven generative models of human movement using autoregressive conditional variational autoencoders, or Motion VAEs. The latent variables of the learned autoencoder define the action space for the movement and thereby govern its evolution over time. Planning or control algorithms can then use this action space to generate desired motions. In particular, we use deep reinforcement learning to learn controllers that achieve goal-directed movements. We demonstrate the effectiveness of the approach on multiple tasks. We further evaluate system-design choices and describe the current limitations of Motion VAEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.14274v1-abstract-full').style.display = 'none'; document.getElementById('2103.14274v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://www.cs.ubc.ca/~hyuling/projects/mvae/ ; Code: https://github.com/electronicarts/character-motion-vaes</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.13584">arXiv:2103.13584</a> <span> [<a href="https://arxiv.org/pdf/2103.13584">pdf</a>, <a href="https://arxiv.org/format/2103.13584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BERT4SO: Neural Sentence Ordering by Fine-tuning BERT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yutao Zhu</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jian-Yun Nie</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kun Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shengchao Liu</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Y">Yabo Ling</a>, <a href="/search/cs?searchtype=author&query=Du%2C+P">Pan Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.13584v3-abstract-short" style="display: inline;"> Sentence ordering aims to arrange the sentences of a given text in the correct order. Recent work frames it as a ranking problem and applies deep neural networks to it. In this work, we propose a new method, named BERT4SO, by fine-tuning BERT for sentence ordering. We concatenate all sentences and compute their representations by using multiple special tokens and carefully designed segment (interv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.13584v3-abstract-full').style.display = 'inline'; document.getElementById('2103.13584v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.13584v3-abstract-full" style="display: none;"> Sentence ordering aims to arrange the sentences of a given text in the correct order. Recent work frames it as a ranking problem and applies deep neural networks to it. In this work, we propose a new method, named BERT4SO, by fine-tuning BERT for sentence ordering. We concatenate all sentences and compute their representations by using multiple special tokens and carefully designed segment (interval) embeddings. The tokens across multiple sentences can attend to each other which greatly enhances their interactions. We also propose a margin-based listwise ranking loss based on ListMLE to facilitate the optimization process. Experimental results on five benchmark datasets demonstrate the effectiveness of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.13584v3-abstract-full').style.display = 'none'; document.getElementById('2103.13584v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ling%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ling%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ling%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository