Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–34 of 34 results for author: <span class="mathjax">Zia, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zia%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zia, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zia%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zia, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16700">arXiv:2501.16700</a> <span> [<a href="https://arxiv.org/pdf/2501.16700">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Determining Mosaic Resilience in Sugarcane Plants using Hyperspectral Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Olayemi%2C+M">Muyiwa Olayemi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16700v1-abstract-short" style="display: inline;"> Sugarcane mosaic disease poses a serious threat to the Australian sugarcane industry, leading to yield losses of up to 30% in susceptible varieties. Existing manual inspection methods for detecting mosaic resilience are inefficient and impractical for large-scale application. This study introduces a novel approach using hyperspectral imaging and machine learning to detect mosaic resilience by leve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16700v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16700v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16700v1-abstract-full" style="display: none;"> Sugarcane mosaic disease poses a serious threat to the Australian sugarcane industry, leading to yield losses of up to 30% in susceptible varieties. Existing manual inspection methods for detecting mosaic resilience are inefficient and impractical for large-scale application. This study introduces a novel approach using hyperspectral imaging and machine learning to detect mosaic resilience by leveraging global feature representation from local spectral patches. Hyperspectral data were collected from eight sugarcane varieties under controlled and field conditions. Local spectral patches were analyzed to capture spatial and spectral variations, which were then aggregated into global feature representations using a ResNet18 deep learning architecture. While classical methods like Support Vector Machines struggled to utilize spatial-spectral relationships effectively, the deep learning model achieved high classification accuracy, demonstrating its capacity to identify mosaic resilience from fine-grained hyperspectral data. This approach enhances early detection capabilities, enabling more efficient management of susceptible strains and contributing to sustainable sugarcane production. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16700v1-abstract-full').style.display = 'none'; document.getElementById('2501.16700v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09209">arXiv:2501.09209</a> <span> [<a href="https://arxiv.org/pdf/2501.09209">pdf</a>, <a href="https://arxiv.org/format/2501.09209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Surgical Visual Understanding (SurgVU) Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Berniker%2C+M">Max Berniker</a>, <a href="/search/cs?searchtype=author&query=Nespolo%2C+R">Rogerio Nespolo</a>, <a href="/search/cs?searchtype=author&query=Perreault%2C+C">Conor Perreault</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziheng Wang</a>, <a href="/search/cs?searchtype=author&query=Mueller%2C+B">Benjamin Mueller</a>, <a href="/search/cs?searchtype=author&query=Schmidt%2C+R">Ryan Schmidt</a>, <a href="/search/cs?searchtype=author&query=Bhattacharyya%2C+K">Kiran Bhattacharyya</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/cs?searchtype=author&query=Jarc%2C+A">Anthony Jarc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09209v1-abstract-short" style="display: inline;"> Owing to recent advances in machine learning and the ability to harvest large amounts of data during robotic-assisted surgeries, surgical data science is ripe for foundational work. We present a large dataset of surgical videos and their accompanying labels for this purpose. We describe how the data was collected and some of its unique attributes. Multiple example problems are outlined. Although t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09209v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09209v1-abstract-full" style="display: none;"> Owing to recent advances in machine learning and the ability to harvest large amounts of data during robotic-assisted surgeries, surgical data science is ripe for foundational work. We present a large dataset of surgical videos and their accompanying labels for this purpose. We describe how the data was collected and some of its unique attributes. Multiple example problems are outlined. Although the dataset was curated for a particular set of scientific challenges (in an accompanying paper), it is general enough to be used for a broad range machine learning questions. Our hope is that this dataset exposes the larger machine learning community to the challenging problems within surgical data science, and becomes a touchstone for future research. The videos are available at https://storage.googleapis.com/isi-surgvu/surgvu24_videos_only.zip, the labels at https://storage.googleapis.com/isi-surgvu/surgvu24_labels_updated_v2.zip, and a validation set for tool detection problem at https://storage.googleapis.com/isi-surgvu/cat1_test_set_public.zip. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09209v1-abstract-full').style.display = 'none'; document.getElementById('2501.09209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19005">arXiv:2411.19005</a> <span> [<a href="https://arxiv.org/pdf/2411.19005">pdf</a>, <a href="https://arxiv.org/format/2411.19005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Locally-Focused Face Representation for Sketch-to-Image Generation Using Noise-Induced Refinement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ramzan%2C+M+U">Muhammad Umer Ramzan</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Khamis%2C+A">Abdelwahed Khamis</a>, <a href="/search/cs?searchtype=author&query=Elgharabawy%2C+y">yman Elgharabawy</a>, <a href="/search/cs?searchtype=author&query=Liaqat%2C+A">Ahmad Liaqat</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+U">Usman Ali</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19005v1-abstract-short" style="display: inline;"> This paper presents a novel deep-learning framework that significantly enhances the transformation of rudimentary face sketches into high-fidelity colour images. Employing a Convolutional Block Attention-based Auto-encoder Network (CA2N), our approach effectively captures and enhances critical facial features through a block attention mechanism within an encoder-decoder architecture. Subsequently,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19005v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19005v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19005v1-abstract-full" style="display: none;"> This paper presents a novel deep-learning framework that significantly enhances the transformation of rudimentary face sketches into high-fidelity colour images. Employing a Convolutional Block Attention-based Auto-encoder Network (CA2N), our approach effectively captures and enhances critical facial features through a block attention mechanism within an encoder-decoder architecture. Subsequently, the framework utilises a noise-induced conditional Generative Adversarial Network (cGAN) process that allows the system to maintain high performance even on domains unseen during the training. These enhancements lead to considerable improvements in image realism and fidelity, with our model achieving superior performance metrics that outperform the best method by FID margin of 17, 23, and 38 on CelebAMask-HQ, CUHK, and CUFSF datasets; respectively. The model sets a new state-of-the-art in sketch-to-image generation, can generalize across sketch types, and offers a robust solution for applications such as criminal identification in law enforcement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19005v1-abstract-full').style.display = 'none'; document.getElementById('2411.19005v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted for publication in 25th International Conference on Digital Image Computing: Techniques & Applications (DICTA) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23901">arXiv:2410.23901</a> <span> [<a href="https://arxiv.org/pdf/2410.23901">pdf</a>, <a href="https://arxiv.org/format/2410.23901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NeFF-BioNet: Crop Biomass Prediction from Point Cloud to Drone Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuesong Li</a>, <a href="/search/cs?searchtype=author&query=Hayder%2C+Z">Zeeshan Hayder</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Cassidy%2C+C">Connor Cassidy</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiming Liu</a>, <a href="/search/cs?searchtype=author&query=Stiller%2C+W">Warwick Stiller</a>, <a href="/search/cs?searchtype=author&query=Stone%2C+E">Eric Stone</a>, <a href="/search/cs?searchtype=author&query=Conaty%2C+W">Warren Conaty</a>, <a href="/search/cs?searchtype=author&query=Petersson%2C+L">Lars Petersson</a>, <a href="/search/cs?searchtype=author&query=Rolland%2C+V">Vivien Rolland</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23901v1-abstract-short" style="display: inline;"> Crop biomass offers crucial insights into plant health and yield, making it essential for crop science, farming systems, and agricultural research. However, current measurement methods, which are labor-intensive, destructive, and imprecise, hinder large-scale quantification of this trait. To address this limitation, we present a biomass prediction network (BioNet), designed for adaptation across d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23901v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23901v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23901v1-abstract-full" style="display: none;"> Crop biomass offers crucial insights into plant health and yield, making it essential for crop science, farming systems, and agricultural research. However, current measurement methods, which are labor-intensive, destructive, and imprecise, hinder large-scale quantification of this trait. To address this limitation, we present a biomass prediction network (BioNet), designed for adaptation across different data modalities, including point clouds and drone imagery. Our BioNet, utilizing a sparse 3D convolutional neural network (CNN) and a transformer-based prediction module, processes point clouds and other 3D data representations to predict biomass. To further extend BioNet for drone imagery, we integrate a neural feature field (NeFF) module, enabling 3D structure reconstruction and the transformation of 2D semantic features from vision foundation models into the corresponding 3D surfaces. For the point cloud modality, BioNet demonstrates superior performance on two public datasets, with an approximate 6.1% relative improvement (RI) over the state-of-the-art. In the RGB image modality, the combination of BioNet and NeFF achieves a 7.9% RI. Additionally, the NeFF-based approach utilizes inexpensive, portable drone-mounted cameras, providing a scalable solution for large field applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23901v1-abstract-full').style.display = 'none'; document.getElementById('2410.23901v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16208">arXiv:2408.16208</a> <span> [<a href="https://arxiv.org/pdf/2408.16208">pdf</a>, <a href="https://arxiv.org/format/2408.16208">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ReXamine-Global: A Framework for Uncovering Inconsistencies in Radiology Report Generation Metrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Banerjee%2C+O">Oishi Banerjee</a>, <a href="/search/cs?searchtype=author&query=Saenz%2C+A">Agustina Saenz</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kay Wu</a>, <a href="/search/cs?searchtype=author&query=Clements%2C+W">Warren Clements</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Adil Zia</a>, <a href="/search/cs?searchtype=author&query=Buensalido%2C+D">Dominic Buensalido</a>, <a href="/search/cs?searchtype=author&query=Kavnoudias%2C+H">Helen Kavnoudias</a>, <a href="/search/cs?searchtype=author&query=Abi-Ghanem%2C+A+S">Alain S. Abi-Ghanem</a>, <a href="/search/cs?searchtype=author&query=Ghawi%2C+N+E">Nour El Ghawi</a>, <a href="/search/cs?searchtype=author&query=Luna%2C+C">Cibele Luna</a>, <a href="/search/cs?searchtype=author&query=Castillo%2C+P">Patricia Castillo</a>, <a href="/search/cs?searchtype=author&query=Al-Surimi%2C+K">Khaled Al-Surimi</a>, <a href="/search/cs?searchtype=author&query=Daghistani%2C+R+A">Rayyan A. Daghistani</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuh-Min Chen</a>, <a href="/search/cs?searchtype=author&query=Chao%2C+H">Heng-sheng Chao</a>, <a href="/search/cs?searchtype=author&query=Heiliger%2C+L">Lars Heiliger</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+M">Moon Kim</a>, <a href="/search/cs?searchtype=author&query=Haubold%2C+J">Johannes Haubold</a>, <a href="/search/cs?searchtype=author&query=Jonske%2C+F">Frederic Jonske</a>, <a href="/search/cs?searchtype=author&query=Rajpurkar%2C+P">Pranav Rajpurkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16208v1-abstract-short" style="display: inline;"> Given the rapidly expanding capabilities of generative AI models for radiology, there is a need for robust metrics that can accurately measure the quality of AI-generated radiology reports across diverse hospitals. We develop ReXamine-Global, a LLM-powered, multi-site framework that tests metrics across different writing styles and patient populations, exposing gaps in their generalization. First,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16208v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16208v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16208v1-abstract-full" style="display: none;"> Given the rapidly expanding capabilities of generative AI models for radiology, there is a need for robust metrics that can accurately measure the quality of AI-generated radiology reports across diverse hospitals. We develop ReXamine-Global, a LLM-powered, multi-site framework that tests metrics across different writing styles and patient populations, exposing gaps in their generalization. First, our method tests whether a metric is undesirably sensitive to reporting style, providing different scores depending on whether AI-generated reports are stylistically similar to ground-truth reports or not. Second, our method measures whether a metric reliably agrees with experts, or whether metric and expert scores of AI-generated report quality diverge for some sites. Using 240 reports from 6 hospitals around the world, we apply ReXamine-Global to 7 established report evaluation metrics and uncover serious gaps in their generalizability. Developers can apply ReXamine-Global when designing new report evaluation metrics, ensuring their robustness across sites. Additionally, our analysis of existing metrics can guide users of those metrics towards evaluation procedures that work reliably at their sites of interest. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16208v1-abstract-full').style.display = 'none'; document.getElementById('2408.16208v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05029">arXiv:2408.05029</a> <span> [<a href="https://arxiv.org/pdf/2408.05029">pdf</a>, <a href="https://arxiv.org/format/2408.05029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Collaborative Static-Dynamic Teaching: A Semi-Supervised Framework for Stripe-Like Space Target Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zijian Zhu</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuesong Li</a>, <a href="/search/cs?searchtype=author&query=Dan%2C+B">Bingbing Dan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yuebo Ma</a>, <a href="/search/cs?searchtype=author&query=Long%2C+H">Hongfeng Long</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+K">Kaili Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+E">Enhai Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Rujin Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05029v1-abstract-short" style="display: inline;"> Stripe-like space target detection (SSTD) is crucial for space situational awareness. Traditional unsupervised methods often fail in low signal-to-noise ratio and variable stripe-like space targets scenarios, leading to weak generalization. Although fully supervised learning methods improve model generalization, they require extensive pixel-level labels for training. In the SSTD task, manually cre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05029v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05029v1-abstract-full" style="display: none;"> Stripe-like space target detection (SSTD) is crucial for space situational awareness. Traditional unsupervised methods often fail in low signal-to-noise ratio and variable stripe-like space targets scenarios, leading to weak generalization. Although fully supervised learning methods improve model generalization, they require extensive pixel-level labels for training. In the SSTD task, manually creating these labels is often inaccurate and labor-intensive. Semi-supervised learning (SSL) methods reduce the need for these labels and enhance model generalizability, but their performance is limited by pseudo-label quality. To address this, we introduce an innovative Collaborative Static-Dynamic Teacher (CSDT) SSL framework, which includes static and dynamic teacher models as well as a student model. This framework employs a customized adaptive pseudo-labeling (APL) strategy, transitioning from initial static teaching to adaptive collaborative teaching, guiding the student model's training. The exponential moving average (EMA) mechanism further enhances this process by feeding new stripe-like knowledge back to the dynamic teacher model through the student model, creating a positive feedback loop that continuously enhances the quality of pseudo-labels. Moreover, we present MSSA-Net, a novel SSTD network featuring a multi-scale dual-path convolution (MDPC) block and a feature map weighted attention (FMWA) block, designed to extract diverse stripe-like features within the CSDT SSL training framework. Extensive experiments verify the state-of-the-art performance of our framework on the AstroStripeSet and various ground-based and space-based real-world datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05029v1-abstract-full').style.display = 'none'; document.getElementById('2408.05029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18097">arXiv:2407.18097</a> <span> [<a href="https://arxiv.org/pdf/2407.18097">pdf</a>, <a href="https://arxiv.org/format/2407.18097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SSTD: Stripe-Like Space Target Detection Using Single-Point Weak Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zijian Zhu</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuesong Li</a>, <a href="/search/cs?searchtype=author&query=Dan%2C+B">Bingbing Dan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yuebo Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+E">Enhai Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Rujin Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18097v2-abstract-short" style="display: inline;"> Stripe-like space target detection (SSTD) plays a key role in enhancing space situational awareness and assessing spacecraft behaviour. This domain faces three challenges: the lack of publicly available datasets, interference from stray light and stars, and the variability of stripe-like targets, which makes manual labeling both inaccurate and labor-intensive. In response, we introduces `AstroStri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18097v2-abstract-full').style.display = 'inline'; document.getElementById('2407.18097v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18097v2-abstract-full" style="display: none;"> Stripe-like space target detection (SSTD) plays a key role in enhancing space situational awareness and assessing spacecraft behaviour. This domain faces three challenges: the lack of publicly available datasets, interference from stray light and stars, and the variability of stripe-like targets, which makes manual labeling both inaccurate and labor-intensive. In response, we introduces `AstroStripeSet', a pioneering dataset designed for SSTD, aiming to bridge the gap in academic resources and advance research in SSTD. Furthermore, we propose a novel teacher-student label evolution framework with single-point weak supervision, providing a new solution to the challenges of manual labeling. This framework starts with generating initial pseudo-labels using the zero-shot capabilities of the Segment Anything Model (SAM) in a single-point setting. After that, the fine-tuned StripeSAM serves as the teacher and the newly developed StripeNet as the student, consistently improving segmentation performance through label evolution, which iteratively refines these labels. We also introduce `GeoDice', a new loss function customized for the linear characteristics of stripe-like targets. Extensive experiments show that our method matches fully supervised approaches, exhibits strong zero-shot generalization for diverse space-based and ground-based real-world images, and sets a new state-of-the-art (SOTA) benchmark. Our AstroStripeSet dataset and code will be made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18097v2-abstract-full').style.display = 'none'; document.getElementById('2407.18097v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.11256">arXiv:2404.11256</a> <span> [<a href="https://arxiv.org/pdf/2404.11256">pdf</a>, <a href="https://arxiv.org/format/2404.11256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MMCBE: Multi-modality Dataset for Crop Biomass Prediction and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuesong Li</a>, <a href="/search/cs?searchtype=author&query=Hayder%2C+Z">Zeeshan Hayder</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Cassidy%2C+C">Connor Cassidy</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shiming Liu</a>, <a href="/search/cs?searchtype=author&query=Stiller%2C+W">Warwick Stiller</a>, <a href="/search/cs?searchtype=author&query=Stone%2C+E">Eric Stone</a>, <a href="/search/cs?searchtype=author&query=Conaty%2C+W">Warren Conaty</a>, <a href="/search/cs?searchtype=author&query=Petersson%2C+L">Lars Petersson</a>, <a href="/search/cs?searchtype=author&query=Rolland%2C+V">Vivien Rolland</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.11256v3-abstract-short" style="display: inline;"> Crop biomass, a critical indicator of plant growth, health, and productivity, is invaluable for crop breeding programs and agronomic research. However, the accurate and scalable quantification of crop biomass remains inaccessible due to limitations in existing measurement methods. One of the obstacles impeding the advancement of current crop biomass prediction methodologies is the scarcity of publ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11256v3-abstract-full').style.display = 'inline'; document.getElementById('2404.11256v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.11256v3-abstract-full" style="display: none;"> Crop biomass, a critical indicator of plant growth, health, and productivity, is invaluable for crop breeding programs and agronomic research. However, the accurate and scalable quantification of crop biomass remains inaccessible due to limitations in existing measurement methods. One of the obstacles impeding the advancement of current crop biomass prediction methodologies is the scarcity of publicly available datasets. Addressing this gap, we introduce a new dataset in this domain, i.e. Multi-modality dataset for crop biomass estimation (MMCBE). Comprising 216 sets of multi-view drone images, coupled with LiDAR point clouds, and hand-labelled ground truth, MMCBE represents the first multi-modality one in the field. This dataset aims to establish benchmark methods for crop biomass quantification and foster the development of vision-based approaches. We have rigorously evaluated state-of-the-art crop biomass estimation methods using MMCBE and ventured into additional potential applications, such as 3D crop reconstruction from drone imagery and novel-view rendering. With this publication, we are making our comprehensive dataset available to the broader community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11256v3-abstract-full').style.display = 'none'; document.getElementById('2404.11256v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02441">arXiv:2402.02441</a> <span> [<a href="https://arxiv.org/pdf/2402.02441">pdf</a>, <a href="https://arxiv.org/format/2402.02441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> TopoX: A Suite of Python Packages for Machine Learning on Topological Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hajij%2C+M">Mustafa Hajij</a>, <a href="/search/cs?searchtype=author&query=Papillon%2C+M">Mathilde Papillon</a>, <a href="/search/cs?searchtype=author&query=Frantzen%2C+F">Florian Frantzen</a>, <a href="/search/cs?searchtype=author&query=Agerberg%2C+J">Jens Agerberg</a>, <a href="/search/cs?searchtype=author&query=AlJabea%2C+I">Ibrahem AlJabea</a>, <a href="/search/cs?searchtype=author&query=Ballester%2C+R">Rub茅n Ballester</a>, <a href="/search/cs?searchtype=author&query=Battiloro%2C+C">Claudio Battiloro</a>, <a href="/search/cs?searchtype=author&query=Bern%C3%A1rdez%2C+G">Guillermo Bern谩rdez</a>, <a href="/search/cs?searchtype=author&query=Birdal%2C+T">Tolga Birdal</a>, <a href="/search/cs?searchtype=author&query=Brent%2C+A">Aiden Brent</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+P">Peter Chin</a>, <a href="/search/cs?searchtype=author&query=Escalera%2C+S">Sergio Escalera</a>, <a href="/search/cs?searchtype=author&query=Fiorellino%2C+S">Simone Fiorellino</a>, <a href="/search/cs?searchtype=author&query=Gardaa%2C+O+H">Odin Hoff Gardaa</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Gurusankar Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Govil%2C+D">Devendra Govil</a>, <a href="/search/cs?searchtype=author&query=Hoppe%2C+J">Josef Hoppe</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+M+R">Maneel Reddy Karri</a>, <a href="/search/cs?searchtype=author&query=Khouja%2C+J">Jude Khouja</a>, <a href="/search/cs?searchtype=author&query=Lecha%2C+M">Manuel Lecha</a>, <a href="/search/cs?searchtype=author&query=Livesay%2C+N">Neal Livesay</a>, <a href="/search/cs?searchtype=author&query=Mei%C3%9Fner%2C+J">Jan Mei脽ner</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+S">Soham Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Nikitin%2C+A">Alexander Nikitin</a>, <a href="/search/cs?searchtype=author&query=Papamarkou%2C+T">Theodore Papamarkou</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02441v5-abstract-short" style="display: inline;"> We introduce TopoX, a Python software suite that provides reliable and user-friendly building blocks for computing and machine learning on topological domains that extend graphs: hypergraphs, simplicial, cellular, path and combinatorial complexes. TopoX consists of three packages: TopoNetX facilitates constructing and computing on these domains, including working with nodes, edges and higher-order… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02441v5-abstract-full').style.display = 'inline'; document.getElementById('2402.02441v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02441v5-abstract-full" style="display: none;"> We introduce TopoX, a Python software suite that provides reliable and user-friendly building blocks for computing and machine learning on topological domains that extend graphs: hypergraphs, simplicial, cellular, path and combinatorial complexes. TopoX consists of three packages: TopoNetX facilitates constructing and computing on these domains, including working with nodes, edges and higher-order cells; TopoEmbedX provides methods to embed topological domains into vector spaces, akin to popular graph-based embedding algorithms such as node2vec; TopoModelX is built on top of PyTorch and offers a comprehensive toolbox of higher-order message passing functions for neural networks on topological domains. The extensively documented and unit-tested source code of TopoX is available under MIT license at https://pyt-team.github.io/}{https://pyt-team.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02441v5-abstract-full').style.display = 'none'; document.getElementById('2402.02441v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.15188">arXiv:2309.15188</a> <span> [<a href="https://arxiv.org/pdf/2309.15188">pdf</a>, <a href="https://arxiv.org/format/2309.15188">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5281/zenodo.7958513">10.5281/zenodo.7958513 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ICML 2023 Topological Deep Learning Challenge : Design and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Papillon%2C+M">Mathilde Papillon</a>, <a href="/search/cs?searchtype=author&query=Hajij%2C+M">Mustafa Hajij</a>, <a href="/search/cs?searchtype=author&query=Jenne%2C+H">Helen Jenne</a>, <a href="/search/cs?searchtype=author&query=Mathe%2C+J">Johan Mathe</a>, <a href="/search/cs?searchtype=author&query=Myers%2C+A">Audun Myers</a>, <a href="/search/cs?searchtype=author&query=Papamarkou%2C+T">Theodore Papamarkou</a>, <a href="/search/cs?searchtype=author&query=Birdal%2C+T">Tolga Birdal</a>, <a href="/search/cs?searchtype=author&query=Dey%2C+T">Tamal Dey</a>, <a href="/search/cs?searchtype=author&query=Doster%2C+T">Tim Doster</a>, <a href="/search/cs?searchtype=author&query=Emerson%2C+T">Tegan Emerson</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Gurusankar Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Govil%2C+D">Devendra Govil</a>, <a href="/search/cs?searchtype=author&query=Guzm%C3%A1n-S%C3%A1enz%2C+A">Aldo Guzm谩n-S谩enz</a>, <a href="/search/cs?searchtype=author&query=Kvinge%2C+H">Henry Kvinge</a>, <a href="/search/cs?searchtype=author&query=Livesay%2C+N">Neal Livesay</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+S">Soham Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Samaga%2C+S+N">Shreyas N. Samaga</a>, <a href="/search/cs?searchtype=author&query=Ramamurthy%2C+K+N">Karthikeyan Natesan Ramamurthy</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+M+R">Maneel Reddy Karri</a>, <a href="/search/cs?searchtype=author&query=Rosen%2C+P">Paul Rosen</a>, <a href="/search/cs?searchtype=author&query=Sanborn%2C+S">Sophia Sanborn</a>, <a href="/search/cs?searchtype=author&query=Walters%2C+R">Robin Walters</a>, <a href="/search/cs?searchtype=author&query=Agerberg%2C+J">Jens Agerberg</a>, <a href="/search/cs?searchtype=author&query=Barikbin%2C+S">Sadrodin Barikbin</a>, <a href="/search/cs?searchtype=author&query=Battiloro%2C+C">Claudio Battiloro</a> , et al. (31 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.15188v4-abstract-short" style="display: inline;"> This paper presents the computational challenge on topological deep learning that was hosted within the ICML 2023 Workshop on Topology and Geometry in Machine Learning. The competition asked participants to provide open-source implementations of topological neural networks from the literature by contributing to the python packages TopoNetX (data processing) and TopoModelX (deep learning). The chal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15188v4-abstract-full').style.display = 'inline'; document.getElementById('2309.15188v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.15188v4-abstract-full" style="display: none;"> This paper presents the computational challenge on topological deep learning that was hosted within the ICML 2023 Workshop on Topology and Geometry in Machine Learning. The competition asked participants to provide open-source implementations of topological neural networks from the literature by contributing to the python packages TopoNetX (data processing) and TopoModelX (deep learning). The challenge attracted twenty-eight qualifying submissions in its two-month duration. This paper describes the design of the challenge and summarizes its main findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15188v4-abstract-full').style.display = 'none'; document.getElementById('2309.15188v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00005">arXiv:2309.00005</a> <span> [<a href="https://arxiv.org/pdf/2309.00005">pdf</a>, <a href="https://arxiv.org/format/2309.00005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> High Spectral Spatial Resolution Synthetic HyperSpectral Dataset form multi-source fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yajie Sun</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00005v1-abstract-short" style="display: inline;"> This research paper introduces a synthetic hyperspectral dataset that combines high spectral and spatial resolution imaging to achieve a comprehensive, accurate, and detailed representation of observed scenes or objects. Obtaining such desirable qualities is challenging when relying on a single camera. The proposed dataset addresses this limitation by leveraging three modalities: RGB, push-broom v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00005v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00005v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00005v1-abstract-full" style="display: none;"> This research paper introduces a synthetic hyperspectral dataset that combines high spectral and spatial resolution imaging to achieve a comprehensive, accurate, and detailed representation of observed scenes or objects. Obtaining such desirable qualities is challenging when relying on a single camera. The proposed dataset addresses this limitation by leveraging three modalities: RGB, push-broom visible hyperspectral camera, and snapshot infrared hyperspectral camera, each offering distinct spatial and spectral resolutions. Different camera systems exhibit varying photometric properties, resulting in a trade-off between spatial and spectral resolution. RGB cameras typically offer high spatial resolution but limited spectral resolution, while hyperspectral cameras possess high spectral resolution at the expense of spatial resolution. Moreover, hyperspectral cameras themselves employ different capturing techniques and spectral ranges, further complicating the acquisition of comprehensive data. By integrating the photometric properties of these modalities, a single synthetic hyperspectral image can be generated, facilitating the exploration of broader spectral-spatial relationships for improved analysis, monitoring, and decision-making across various fields. This paper emphasizes the importance of multi-modal fusion in producing a high-quality synthetic hyperspectral dataset with consistent spectral intervals between bands. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00005v1-abstract-full').style.display = 'none'; document.getElementById('2309.00005v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IJCNN workshop on Multimodal Synthetic Data for Deep Neural Networks (MSynD), 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04515">arXiv:2307.04515</a> <span> [<a href="https://arxiv.org/pdf/2307.04515">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SAGC-A68: a space access graph dataset for the classification of spaces and space elements in apartment buildings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ziaee%2C+A">Amir Ziaee</a>, <a href="/search/cs?searchtype=author&query=Suter%2C+G">Georg Suter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04515v1-abstract-short" style="display: inline;"> The analysis of building models for usable area, building safety, and energy use requires accurate classification data of spaces and space elements. To reduce input model preparation effort and errors, automated classification of spaces and space elements is desirable. A barrier hindering the utilization of Graph Deep Learning (GDL) methods to space function and space element classification is a l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04515v1-abstract-full').style.display = 'inline'; document.getElementById('2307.04515v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04515v1-abstract-full" style="display: none;"> The analysis of building models for usable area, building safety, and energy use requires accurate classification data of spaces and space elements. To reduce input model preparation effort and errors, automated classification of spaces and space elements is desirable. A barrier hindering the utilization of Graph Deep Learning (GDL) methods to space function and space element classification is a lack of suitable datasets. To bridge this gap, we introduce a dataset, SAGC-A68, which comprises access graphs automatically generated from 68 digital 3D models of space layouts of apartment buildings. This graph-based dataset is well-suited for developing GDL models for space function and space element classification. To demonstrate the potential of the dataset, we employ it to train and evaluate a graph attention network (GAT) that predicts 22 space function and 6 space element classes. The dataset and code used in the experiment are available online. https://doi.org/10.5281/zenodo.7805872, https://github.com/A2Amir/SAGC-A68. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04515v1-abstract-full').style.display = 'none'; document.getElementById('2307.04515v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in proceedings of the 30th International Workshop on Intelligent Computing in Engineering, EG-ICE 2023, London, England. https://www.ucl.ac.uk/bartlett/construction/sites/bartlett_construction/files/sagc-a68_a_space_access_graph_dataset_for_the_classification_of_spaces_and_space_elements_in_apartment_buildings.pdf</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06283">arXiv:2306.06283</a> <span> [<a href="https://arxiv.org/pdf/2306.06283">pdf</a>, <a href="https://arxiv.org/format/2306.06283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1039/D3DD00113J">10.1039/D3DD00113J <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> 14 Examples of How LLMs Can Transform Materials Science and Chemistry: A Reflection on a Large Language Model Hackathon </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jablonka%2C+K+M">Kevin Maik Jablonka</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Q">Qianxiang Ai</a>, <a href="/search/cs?searchtype=author&query=Al-Feghali%2C+A">Alexander Al-Feghali</a>, <a href="/search/cs?searchtype=author&query=Badhwar%2C+S">Shruti Badhwar</a>, <a href="/search/cs?searchtype=author&query=Bocarsly%2C+J+D">Joshua D. Bocarsly</a>, <a href="/search/cs?searchtype=author&query=Bran%2C+A+M">Andres M Bran</a>, <a href="/search/cs?searchtype=author&query=Bringuier%2C+S">Stefan Bringuier</a>, <a href="/search/cs?searchtype=author&query=Brinson%2C+L+C">L. Catherine Brinson</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+K">Kamal Choudhary</a>, <a href="/search/cs?searchtype=author&query=Circi%2C+D">Defne Circi</a>, <a href="/search/cs?searchtype=author&query=Cox%2C+S">Sam Cox</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+W+A">Wibe A. de Jong</a>, <a href="/search/cs?searchtype=author&query=Evans%2C+M+L">Matthew L. Evans</a>, <a href="/search/cs?searchtype=author&query=Gastellu%2C+N">Nicolas Gastellu</a>, <a href="/search/cs?searchtype=author&query=Genzling%2C+J">Jerome Genzling</a>, <a href="/search/cs?searchtype=author&query=Gil%2C+M+V">Mar铆a Victoria Gil</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A+K">Ankur K. Gupta</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Z">Zhi Hong</a>, <a href="/search/cs?searchtype=author&query=Imran%2C+A">Alishba Imran</a>, <a href="/search/cs?searchtype=author&query=Kruschwitz%2C+S">Sabine Kruschwitz</a>, <a href="/search/cs?searchtype=author&query=Labarre%2C+A">Anne Labarre</a>, <a href="/search/cs?searchtype=author&query=L%C3%A1la%2C+J">Jakub L谩la</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tao Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Steven Ma</a>, <a href="/search/cs?searchtype=author&query=Majumdar%2C+S">Sauradeep Majumdar</a> , et al. (28 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06283v4-abstract-short" style="display: inline;"> Large-language models (LLMs) such as GPT-4 caught the interest of many scientists. Recent studies suggested that these models could be useful in chemistry and materials science. To explore these possibilities, we organized a hackathon. This article chronicles the projects built as part of this hackathon. Participants employed LLMs for various applications, including predicting properties of mole… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06283v4-abstract-full').style.display = 'inline'; document.getElementById('2306.06283v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06283v4-abstract-full" style="display: none;"> Large-language models (LLMs) such as GPT-4 caught the interest of many scientists. Recent studies suggested that these models could be useful in chemistry and materials science. To explore these possibilities, we organized a hackathon. This article chronicles the projects built as part of this hackathon. Participants employed LLMs for various applications, including predicting properties of molecules and materials, designing novel interfaces for tools, extracting knowledge from unstructured data, and developing new educational applications. The diverse topics and the fact that working prototypes could be generated in less than two days highlight that LLMs will profoundly impact the future of our fields. The rich collection of ideas and projects also indicates that the applications of LLMs are not limited to materials science and chemistry but offer potential benefits to a wide range of scientific disciplines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06283v4-abstract-full').style.display = 'none'; document.getElementById('2306.06283v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16600">arXiv:2305.16600</a> <span> [<a href="https://arxiv.org/pdf/2305.16600">pdf</a>, <a href="https://arxiv.org/format/2305.16600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Temporal Evolution of Risk Behavior in a Disease Spread Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Langle-Chimal%2C+O+D">Ollin D. Langle-Chimal</a>, <a href="/search/cs?searchtype=author&query=Merrill%2C+S+C">Scott C. Merrill</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&query=Bucini%2C+G">Gabriela Bucini</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tung-Lin Liu</a>, <a href="/search/cs?searchtype=author&query=Shrum%2C+T+R">Trisha R. Shrum</a>, <a href="/search/cs?searchtype=author&query=Koliba%2C+C">Christopher Koliba</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Asim Zia</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+J+M">Julia M. Smith</a>, <a href="/search/cs?searchtype=author&query=Cheney%2C+N">Nicholas Cheney</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16600v2-abstract-short" style="display: inline;"> Human behavior is a dynamic process that evolves with experience. Understanding the evolution of individual's risk propensity is critical to design public health interventions to propitiate the adoption of better biosecurity protocols and thus, prevent the transmission of an infectious disease. Using an experimental game that simulates the spread of a disease in a network of porcine farms, we meas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16600v2-abstract-full').style.display = 'inline'; document.getElementById('2305.16600v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16600v2-abstract-full" style="display: none;"> Human behavior is a dynamic process that evolves with experience. Understanding the evolution of individual's risk propensity is critical to design public health interventions to propitiate the adoption of better biosecurity protocols and thus, prevent the transmission of an infectious disease. Using an experimental game that simulates the spread of a disease in a network of porcine farms, we measure how learning from experience affects the risk aversion of over $1000$ players. We used a fully automated approach to segment the players into 4 categories based on the temporal trends of their game plays and compare the outcomes of their overall game performance. We found that the risk tolerant group is $50\%$ more likely to incur an infection than the risk averse one. We also find that while all individuals decrease the amount of time it takes to make decisions as they become more experienced at the game, we find a group of players with constant decision strategies who rapidly decrease their time to make a decision and a second context-aware decision group that contemplates longer before decisions while presumably performing a real-time risk assessment. The behavioral strategies employed by players in this simulated setting could be used in the future as an early warning signal to identify undesirable biosecurity-related risk aversion preferences, or changes in behavior, which may allow for targeted interventions to help mitigate them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16600v2-abstract-full').style.display = 'none'; document.getElementById('2305.16600v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 1 table, 7 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> ACM-class: F.2.2; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16555">arXiv:2305.16555</a> <span> [<a href="https://arxiv.org/pdf/2305.16555">pdf</a>, <a href="https://arxiv.org/format/2305.16555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CVB: A Video Dataset of Cattle Visual Behaviors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+R">Renuka Sharma</a>, <a href="/search/cs?searchtype=author&query=Arablouei%2C+R">Reza Arablouei</a>, <a href="/search/cs?searchtype=author&query=Bishop-Hurley%2C+G">Greg Bishop-Hurley</a>, <a href="/search/cs?searchtype=author&query=McNally%2C+J">Jody McNally</a>, <a href="/search/cs?searchtype=author&query=Bagnall%2C+N">Neil Bagnall</a>, <a href="/search/cs?searchtype=author&query=Rolland%2C+V">Vivien Rolland</a>, <a href="/search/cs?searchtype=author&query=Kusy%2C+B">Brano Kusy</a>, <a href="/search/cs?searchtype=author&query=Petersson%2C+L">Lars Petersson</a>, <a href="/search/cs?searchtype=author&query=Ingham%2C+A">Aaron Ingham</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16555v2-abstract-short" style="display: inline;"> Existing image/video datasets for cattle behavior recognition are mostly small, lack well-defined labels, or are collected in unrealistic controlled environments. This limits the utility of machine learning (ML) models learned from them. Therefore, we introduce a new dataset, called Cattle Visual Behaviors (CVB), that consists of 502 video clips, each fifteen seconds long, captured in natural ligh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16555v2-abstract-full').style.display = 'inline'; document.getElementById('2305.16555v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16555v2-abstract-full" style="display: none;"> Existing image/video datasets for cattle behavior recognition are mostly small, lack well-defined labels, or are collected in unrealistic controlled environments. This limits the utility of machine learning (ML) models learned from them. Therefore, we introduce a new dataset, called Cattle Visual Behaviors (CVB), that consists of 502 video clips, each fifteen seconds long, captured in natural lighting conditions, and annotated with eleven visually perceptible behaviors of grazing cattle. We use the Computer Vision Annotation Tool (CVAT) to collect our annotations. To make the procedure more efficient, we perform an initial detection and tracking of cattle in the videos using appropriate pre-trained models. The results are corrected by domain experts along with cattle behavior labeling in CVAT. The pre-hoc detection and tracking step significantly reduces the manual annotation time and effort. Moreover, we convert CVB to the atomic visual action (AVA) format and train and evaluate the popular SlowFast action recognition model on it. The associated preliminary results confirm that we can localize the cattle and recognize their frequently occurring behaviors with confidence. By creating and sharing CVB, our aim is to develop improved models capable of recognizing all important behaviors accurately and to assist other researchers and practitioners in developing and evaluating new ML models for cattle behavior classification using video data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16555v2-abstract-full').style.display = 'none'; document.getElementById('2305.16555v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.07152">arXiv:2305.07152</a> <span> [<a href="https://arxiv.org/pdf/2305.07152">pdf</a>, <a href="https://arxiv.org/format/2305.07152">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Surgical tool classification and localization: results and methods from the MICCAI 2022 SurgToolLoc challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Bhattacharyya%2C+K">Kiran Bhattacharyya</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/cs?searchtype=author&query=Berniker%2C+M">Max Berniker</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziheng Wang</a>, <a href="/search/cs?searchtype=author&query=Nespolo%2C+R">Rogerio Nespolo</a>, <a href="/search/cs?searchtype=author&query=Kondo%2C+S">Satoshi Kondo</a>, <a href="/search/cs?searchtype=author&query=Kasai%2C+S">Satoshi Kasai</a>, <a href="/search/cs?searchtype=author&query=Hirasawa%2C+K">Kousuke Hirasawa</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Austin%2C+D">David Austin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiheng Wang</a>, <a href="/search/cs?searchtype=author&query=Futrega%2C+M">Michal Futrega</a>, <a href="/search/cs?searchtype=author&query=Puget%2C+J">Jean-Francois Puget</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhenqiang Li</a>, <a href="/search/cs?searchtype=author&query=Sato%2C+Y">Yoichi Sato</a>, <a href="/search/cs?searchtype=author&query=Fujii%2C+R">Ryo Fujii</a>, <a href="/search/cs?searchtype=author&query=Hachiuma%2C+R">Ryo Hachiuma</a>, <a href="/search/cs?searchtype=author&query=Masuda%2C+M">Mana Masuda</a>, <a href="/search/cs?searchtype=author&query=Saito%2C+H">Hideo Saito</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">An Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mengya Xu</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M">Mobarakol Islam</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Long Bai</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+W">Winnie Pang</a> , et al. (46 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.07152v2-abstract-short" style="display: inline;"> The ability to automatically detect and track surgical instruments in endoscopic videos can enable transformational interventions. Assessing surgical performance and efficiency, identifying skilled tool use and choreography, and planning operational and logistical aspects of OR resources are just a few of the applications that could benefit. Unfortunately, obtaining the annotations needed to train… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07152v2-abstract-full').style.display = 'inline'; document.getElementById('2305.07152v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.07152v2-abstract-full" style="display: none;"> The ability to automatically detect and track surgical instruments in endoscopic videos can enable transformational interventions. Assessing surgical performance and efficiency, identifying skilled tool use and choreography, and planning operational and logistical aspects of OR resources are just a few of the applications that could benefit. Unfortunately, obtaining the annotations needed to train machine learning models to identify and localize surgical tools is a difficult task. Annotating bounding boxes frame-by-frame is tedious and time-consuming, yet large amounts of data with a wide variety of surgical tools and surgeries must be captured for robust training. Moreover, ongoing annotator training is needed to stay up to date with surgical instrument innovation. In robotic-assisted surgery, however, potentially informative data like timestamps of instrument installation and removal can be programmatically harvested. The ability to rely on tool installation data alone would significantly reduce the workload to train robust tool-tracking models. With this motivation in mind we invited the surgical data science community to participate in the challenge, SurgToolLoc 2022. The goal was to leverage tool presence data as weak labels for machine learning models trained to detect tools and localize them in video frames with bounding boxes. We present the results of this challenge along with many of the team's efforts. We conclude by discussing these results in the broader context of machine learning and surgical data science. The training data used for this challenge consisting of 24,695 video clips with tool presence labels is also being released publicly and can be accessed at https://console.cloud.google.com/storage/browser/isi-surgtoolloc-2022. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07152v2-abstract-full').style.display = 'none'; document.getElementById('2305.07152v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.04400">arXiv:2305.04400</a> <span> [<a href="https://arxiv.org/pdf/2305.04400">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Do Large Language Models Show Decision Heuristics Similar to Humans? A Case Study Using GPT-3.5 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Suri%2C+G">Gaurav Suri</a>, <a href="/search/cs?searchtype=author&query=Slater%2C+L+R">Lily R. Slater</a>, <a href="/search/cs?searchtype=author&query=Ziaee%2C+A">Ali Ziaee</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+M">Morgan Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.04400v1-abstract-short" style="display: inline;"> A Large Language Model (LLM) is an artificial intelligence system that has been trained on vast amounts of natural language data, enabling it to generate human-like responses to written or spoken language input. GPT-3.5 is an example of an LLM that supports a conversational agent called ChatGPT. In this work, we used a series of novel prompts to determine whether ChatGPT shows heuristics, biases,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.04400v1-abstract-full').style.display = 'inline'; document.getElementById('2305.04400v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.04400v1-abstract-full" style="display: none;"> A Large Language Model (LLM) is an artificial intelligence system that has been trained on vast amounts of natural language data, enabling it to generate human-like responses to written or spoken language input. GPT-3.5 is an example of an LLM that supports a conversational agent called ChatGPT. In this work, we used a series of novel prompts to determine whether ChatGPT shows heuristics, biases, and other decision effects. We also tested the same prompts on human participants. Across four studies, we found that ChatGPT was influenced by random anchors in making estimates (Anchoring Heuristic, Study 1); it judged the likelihood of two events occurring together to be higher than the likelihood of either event occurring alone, and it was erroneously influenced by salient anecdotal information (Representativeness and Availability Heuristic, Study 2); it found an item to be more efficacious when its features were presented positively rather than negatively - even though both presentations contained identical information (Framing Effect, Study 3); and it valued an owned item more than a newly found item even though the two items were identical (Endowment Effect, Study 4). In each study, human participants showed similar effects. Heuristics and related decision effects in humans are thought to be driven by cognitive and affective processes such as loss aversion and effort reduction. The fact that an LLM - which lacks these processes - also shows such effects invites consideration of the possibility that language may play a role in generating these effects in humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.04400v1-abstract-full').style.display = 'none'; document.getElementById('2305.04400v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.17719">arXiv:2303.17719</a> <span> [<a href="https://arxiv.org/pdf/2303.17719">pdf</a>, <a href="https://arxiv.org/format/2303.17719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Why is the winner the best? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Eisenmann%2C+M">Matthias Eisenmann</a>, <a href="/search/cs?searchtype=author&query=Reinke%2C+A">Annika Reinke</a>, <a href="/search/cs?searchtype=author&query=Weru%2C+V">Vivienn Weru</a>, <a href="/search/cs?searchtype=author&query=Tizabi%2C+M+D">Minu Dietlinde Tizabi</a>, <a href="/search/cs?searchtype=author&query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&query=Adler%2C+T+J">Tim J. Adler</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+S">Sharib Ali</a>, <a href="/search/cs?searchtype=author&query=Andrearczyk%2C+V">Vincent Andrearczyk</a>, <a href="/search/cs?searchtype=author&query=Aubreville%2C+M">Marc Aubreville</a>, <a href="/search/cs?searchtype=author&query=Baid%2C+U">Ujjwal Baid</a>, <a href="/search/cs?searchtype=author&query=Bakas%2C+S">Spyridon Bakas</a>, <a href="/search/cs?searchtype=author&query=Balu%2C+N">Niranjan Balu</a>, <a href="/search/cs?searchtype=author&query=Bano%2C+S">Sophia Bano</a>, <a href="/search/cs?searchtype=author&query=Bernal%2C+J">Jorge Bernal</a>, <a href="/search/cs?searchtype=author&query=Bodenstedt%2C+S">Sebastian Bodenstedt</a>, <a href="/search/cs?searchtype=author&query=Casella%2C+A">Alessandro Casella</a>, <a href="/search/cs?searchtype=author&query=Cheplygina%2C+V">Veronika Cheplygina</a>, <a href="/search/cs?searchtype=author&query=Daum%2C+M">Marie Daum</a>, <a href="/search/cs?searchtype=author&query=de+Bruijne%2C+M">Marleen de Bruijne</a>, <a href="/search/cs?searchtype=author&query=Depeursinge%2C+A">Adrien Depeursinge</a>, <a href="/search/cs?searchtype=author&query=Dorent%2C+R">Reuben Dorent</a>, <a href="/search/cs?searchtype=author&query=Egger%2C+J">Jan Egger</a>, <a href="/search/cs?searchtype=author&query=Ellis%2C+D+G">David G. Ellis</a>, <a href="/search/cs?searchtype=author&query=Engelhardt%2C+S">Sandy Engelhardt</a>, <a href="/search/cs?searchtype=author&query=Ganz%2C+M">Melanie Ganz</a> , et al. (100 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.17719v1-abstract-short" style="display: inline;"> International benchmarking competitions have become fundamental for the comparative performance assessment of image analysis methods. However, little attention has been given to investigating what can be learnt from these competitions. Do they really generate scientific progress? What are common and successful participation strategies? What makes a solution superior to a competing method? To addre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17719v1-abstract-full').style.display = 'inline'; document.getElementById('2303.17719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.17719v1-abstract-full" style="display: none;"> International benchmarking competitions have become fundamental for the comparative performance assessment of image analysis methods. However, little attention has been given to investigating what can be learnt from these competitions. Do they really generate scientific progress? What are common and successful participation strategies? What makes a solution superior to a competing method? To address this gap in the literature, we performed a multi-center study with all 80 competitions that were conducted in the scope of IEEE ISBI 2021 and MICCAI 2021. Statistical analyses performed based on comprehensive descriptions of the submitted algorithms linked to their rank as well as the underlying participation strategies revealed common characteristics of winning solutions. These typically include the use of multi-task learning (63%) and/or multi-stage pipelines (61%), and a focus on augmentation (100%), image preprocessing (97%), data curation (79%), and postprocessing (66%). The "typical" lead of a winning team is a computer scientist with a doctoral degree, five years of experience in biomedical image analysis, and four years of experience in deep learning. Two core general development strategies stood out for highly-ranked teams: the reflection of the metrics in the method design and the focus on analyzing and handling failure cases. According to the organizers, 43% of the winning algorithms exceeded the state of the art but only 11% completely solved the respective domain problem. The insights of our study could help researchers (1) improve algorithm development strategies when approaching new problems, and (2) focus on open research questions revealed by this work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17719v1-abstract-full').style.display = 'none'; document.getElementById('2303.17719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to CVPR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08054">arXiv:2302.08054</a> <span> [<a href="https://arxiv.org/pdf/2302.08054">pdf</a>, <a href="https://arxiv.org/format/2302.08054">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Spectral 3D Computer Vision -- A Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yajie Sun</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Rolland%2C+V">Vivien Rolland</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Charissa Yu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08054v1-abstract-short" style="display: inline;"> Spectral 3D computer vision examines both the geometric and spectral properties of objects. It provides a deeper understanding of an object's physical properties by providing information from narrow bands in various regions of the electromagnetic spectrum. Mapping the spectral information onto the 3D model reveals changes in the spectra-structure space or enhances 3D representations with propertie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08054v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08054v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08054v1-abstract-full" style="display: none;"> Spectral 3D computer vision examines both the geometric and spectral properties of objects. It provides a deeper understanding of an object's physical properties by providing information from narrow bands in various regions of the electromagnetic spectrum. Mapping the spectral information onto the 3D model reveals changes in the spectra-structure space or enhances 3D representations with properties such as reflectance, chromatic aberration, and varying defocus blur. This emerging paradigm advances traditional computer vision and opens new avenues of research in 3D structure, depth estimation, motion analysis, and more. It has found applications in areas such as smart agriculture, environment monitoring, building inspection, geological exploration, and digital cultural heritage records. This survey offers a comprehensive overview of spectral 3D computer vision, including a unified taxonomy of methods, key application areas, and future challenges and prospects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08054v1-abstract-full').style.display = 'none'; document.getElementById('2302.08054v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.03836">arXiv:2302.03836</a> <span> [<a href="https://arxiv.org/pdf/2302.03836">pdf</a>, <a href="https://arxiv.org/format/2302.03836">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10462-024-10710-9">10.1007/s10462-024-10710-9 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Topological Deep Learning: A Review of an Emerging Paradigm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Ali Zia</a>, <a href="/search/cs?searchtype=author&query=Khamis%2C+A">Abdelwahed Khamis</a>, <a href="/search/cs?searchtype=author&query=Nichols%2C+J">James Nichols</a>, <a href="/search/cs?searchtype=author&query=Hayder%2C+Z">Zeeshan Hayder</a>, <a href="/search/cs?searchtype=author&query=Rolland%2C+V">Vivien Rolland</a>, <a href="/search/cs?searchtype=author&query=Petersson%2C+L">Lars Petersson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.03836v1-abstract-short" style="display: inline;"> Topological data analysis (TDA) provides insight into data shape. The summaries obtained by these methods are principled global descriptions of multi-dimensional data whilst exhibiting stable properties such as robustness to deformation and noise. Such properties are desirable in deep learning pipelines but they are typically obtained using non-TDA strategies. This is partly caused by the difficul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.03836v1-abstract-full').style.display = 'inline'; document.getElementById('2302.03836v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.03836v1-abstract-full" style="display: none;"> Topological data analysis (TDA) provides insight into data shape. The summaries obtained by these methods are principled global descriptions of multi-dimensional data whilst exhibiting stable properties such as robustness to deformation and noise. Such properties are desirable in deep learning pipelines but they are typically obtained using non-TDA strategies. This is partly caused by the difficulty of combining TDA constructs (e.g. barcode and persistence diagrams) with current deep learning algorithms. Fortunately, we are now witnessing a growth of deep learning applications embracing topologically-guided components. In this survey, we review the nascent field of topological deep learning by first revisiting the core concepts of TDA. We then explore how the use of TDA techniques has evolved over time to support deep learning frameworks, and how they can be integrated into different aspects of deep learning. Furthermore, we touch on TDA usage for analyzing existing deep models; deep topological analytics. Finally, we discuss the challenges and future prospects of topological deep learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.03836v1-abstract-full').style.display = 'none'; document.getElementById('2302.03836v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages and 2 references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08568">arXiv:2212.08568</a> <span> [<a href="https://arxiv.org/pdf/2212.08568">pdf</a>, <a href="https://arxiv.org/format/2212.08568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Biomedical image analysis competitions: The state of current participation practice </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Eisenmann%2C+M">Matthias Eisenmann</a>, <a href="/search/cs?searchtype=author&query=Reinke%2C+A">Annika Reinke</a>, <a href="/search/cs?searchtype=author&query=Weru%2C+V">Vivienn Weru</a>, <a href="/search/cs?searchtype=author&query=Tizabi%2C+M+D">Minu Dietlinde Tizabi</a>, <a href="/search/cs?searchtype=author&query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&query=Adler%2C+T+J">Tim J. Adler</a>, <a href="/search/cs?searchtype=author&query=Godau%2C+P">Patrick Godau</a>, <a href="/search/cs?searchtype=author&query=Cheplygina%2C+V">Veronika Cheplygina</a>, <a href="/search/cs?searchtype=author&query=Kozubek%2C+M">Michal Kozubek</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+S">Sharib Ali</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Anubha Gupta</a>, <a href="/search/cs?searchtype=author&query=Kybic%2C+J">Jan Kybic</a>, <a href="/search/cs?searchtype=author&query=Noble%2C+A">Alison Noble</a>, <a href="/search/cs?searchtype=author&query=de+Sol%C3%B3rzano%2C+C+O">Carlos Ortiz de Sol贸rzano</a>, <a href="/search/cs?searchtype=author&query=Pachade%2C+S">Samiksha Pachade</a>, <a href="/search/cs?searchtype=author&query=Petitjean%2C+C">Caroline Petitjean</a>, <a href="/search/cs?searchtype=author&query=Sage%2C+D">Daniel Sage</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/cs?searchtype=author&query=Wilden%2C+E">Elizabeth Wilden</a>, <a href="/search/cs?searchtype=author&query=Alapatt%2C+D">Deepak Alapatt</a>, <a href="/search/cs?searchtype=author&query=Andrearczyk%2C+V">Vincent Andrearczyk</a>, <a href="/search/cs?searchtype=author&query=Baid%2C+U">Ujjwal Baid</a>, <a href="/search/cs?searchtype=author&query=Bakas%2C+S">Spyridon Bakas</a>, <a href="/search/cs?searchtype=author&query=Balu%2C+N">Niranjan Balu</a>, <a href="/search/cs?searchtype=author&query=Bano%2C+S">Sophia Bano</a> , et al. (331 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08568v2-abstract-short" style="display: inline;"> The number of international benchmarking competitions is steadily increasing in various fields of machine learning (ML) research and practice. So far, however, little is known about the common practice as well as bottlenecks faced by the community in tackling the research questions posed. To shed light on the status quo of algorithm development in the specific field of biomedical imaging analysis,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08568v2-abstract-full').style.display = 'inline'; document.getElementById('2212.08568v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08568v2-abstract-full" style="display: none;"> The number of international benchmarking competitions is steadily increasing in various fields of machine learning (ML) research and practice. So far, however, little is known about the common practice as well as bottlenecks faced by the community in tackling the research questions posed. To shed light on the status quo of algorithm development in the specific field of biomedical imaging analysis, we designed an international survey that was issued to all participants of challenges conducted in conjunction with the IEEE ISBI 2021 and MICCAI 2021 conferences (80 competitions in total). The survey covered participants' expertise and working environments, their chosen strategies, as well as algorithm characteristics. A median of 72% challenge participants took part in the survey. According to our results, knowledge exchange was the primary incentive (70%) for participation, while the reception of prize money played only a minor role (16%). While a median of 80 working hours was spent on method development, a large portion of participants stated that they did not have enough time for method development (32%). 25% perceived the infrastructure to be a bottleneck. Overall, 94% of all solutions were deep learning-based. Of these, 84% were based on standard architectures. 43% of the respondents reported that the data samples (e.g., images) were too large to be processed at once. This was most commonly addressed by patch-based training (69%), downsampling (37%), and solving 3D analysis tasks as a series of 2D tasks. K-fold cross-validation on the training set was performed by only 37% of the participants and only 50% of the participants performed ensembling based on multiple identical models (61%) or heterogeneous models (39%). 48% of the respondents applied postprocessing steps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08568v2-abstract-full').style.display = 'none'; document.getElementById('2212.08568v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.04448">arXiv:2212.04448</a> <span> [<a href="https://arxiv.org/pdf/2212.04448">pdf</a>, <a href="https://arxiv.org/format/2212.04448">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Objective Surgical Skills Assessment and Tool Localization: Results from the MICCAI 2021 SimSurgSkill Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Bhattacharyya%2C+K">Kiran Bhattacharyya</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziheng Wang</a>, <a href="/search/cs?searchtype=author&query=Berniker%2C+M">Max Berniker</a>, <a href="/search/cs?searchtype=author&query=Kondo%2C+S">Satoshi Kondo</a>, <a href="/search/cs?searchtype=author&query=Colleoni%2C+E">Emanuele Colleoni</a>, <a href="/search/cs?searchtype=author&query=Psychogyios%2C+D">Dimitris Psychogyios</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yueming Jin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jinfan Zhou</a>, <a href="/search/cs?searchtype=author&query=Mazomenos%2C+E">Evangelos Mazomenos</a>, <a href="/search/cs?searchtype=author&query=Maier-Hein%2C+L">Lena Maier-Hein</a>, <a href="/search/cs?searchtype=author&query=Stoyanov%2C+D">Danail Stoyanov</a>, <a href="/search/cs?searchtype=author&query=Speidel%2C+S">Stefanie Speidel</a>, <a href="/search/cs?searchtype=author&query=Jarc%2C+A">Anthony Jarc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.04448v1-abstract-short" style="display: inline;"> Timely and effective feedback within surgical training plays a critical role in developing the skills required to perform safe and efficient surgery. Feedback from expert surgeons, while especially valuable in this regard, is challenging to acquire due to their typically busy schedules, and may be subject to biases. Formal assessment procedures like OSATS and GEARS attempt to provide objective mea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04448v1-abstract-full').style.display = 'inline'; document.getElementById('2212.04448v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.04448v1-abstract-full" style="display: none;"> Timely and effective feedback within surgical training plays a critical role in developing the skills required to perform safe and efficient surgery. Feedback from expert surgeons, while especially valuable in this regard, is challenging to acquire due to their typically busy schedules, and may be subject to biases. Formal assessment procedures like OSATS and GEARS attempt to provide objective measures of skill, but remain time-consuming. With advances in machine learning there is an opportunity for fast and objective automated feedback on technical skills. The SimSurgSkill 2021 challenge (hosted as a sub-challenge of EndoVis at MICCAI 2021) aimed to promote and foster work in this endeavor. Using virtual reality (VR) surgical tasks, competitors were tasked with localizing instruments and predicting surgical skill. Here we summarize the winning approaches and how they performed. Using this publicly available dataset and results as a springboard, future work may enable more efficient training of surgeons with advances in surgical data science. The dataset can be accessed from https://console.cloud.google.com/storage/browser/isi-simsurgskill-2021. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04448v1-abstract-full').style.display = 'none'; document.getElementById('2212.04448v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:1910.04071</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.09094">arXiv:2209.09094</a> <span> [<a href="https://arxiv.org/pdf/2209.09094">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SFS-A68: a dataset for the segmentation of space functions in apartment buildings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ziaee%2C+A">Amir Ziaee</a>, <a href="/search/cs?searchtype=author&query=Suter%2C+G">Georg Suter</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.09094v1-abstract-short" style="display: inline;"> Analyzing building models for usable area, building safety, or energy analysis requires function classification data of spaces and related objects. Automated space function classification is desirable to reduce input model preparation effort and errors. Existing space function classifiers use space feature vectors or space connectivity graphs as input. The application of deep learning (DL) image s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.09094v1-abstract-full').style.display = 'inline'; document.getElementById('2209.09094v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.09094v1-abstract-full" style="display: none;"> Analyzing building models for usable area, building safety, or energy analysis requires function classification data of spaces and related objects. Automated space function classification is desirable to reduce input model preparation effort and errors. Existing space function classifiers use space feature vectors or space connectivity graphs as input. The application of deep learning (DL) image segmentation methods to space function classification has not been studied. As an initial step towards addressing this gap, we present a dataset, SFS-A68, that consists of input and ground truth images generated from 68 digital 3D models of space layouts of apartment buildings. The dataset is suitable for developing DL models for space function segmentation. We use the dataset to train and evaluate an experimental space function segmentation network based on transfer learning and training from scratch. Test results confirm the applicability of DL image segmentation for space function classification. The code and the dataset of the experiments are publicly available online (https://github.com/A2Amir/SFS-A68). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.09094v1-abstract-full').style.display = 'none'; document.getElementById('2209.09094v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in proceedings of the 29th International Workshop on Intelligent Computing in Engineering, EG-ICE 2022, Aarhus, Denmark. https://doi.org/10.7146/aul.455.c222</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Teizer, Jochen & Schultz, Carl. (2022). Proceedings of the 29th EG-ICE International Workshop on Intelligent Computing in Engineering: Frontmatter and Backmatter. 1-8. 10.7146/aul.455.c191 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.08898">arXiv:2209.08898</a> <span> [<a href="https://arxiv.org/pdf/2209.08898">pdf</a>, <a href="https://arxiv.org/format/2209.08898">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3571560.3571566">10.1145/3571560.3571566 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Batch Layer Normalization, A new normalization layer for CNNs and RNN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ziaee%2C+A">Amir Ziaee</a>, <a href="/search/cs?searchtype=author&query=%C3%87ano%2C+E">Erion 脟ano</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.08898v1-abstract-short" style="display: inline;"> This study introduces a new normalization layer termed Batch Layer Normalization (BLN) to reduce the problem of internal covariate shift in deep neural network layers. As a combined version of batch and layer normalization, BLN adaptively puts appropriate weight on mini-batch and feature normalization based on the inverse size of mini-batches to normalize the input to a layer during the learning p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08898v1-abstract-full').style.display = 'inline'; document.getElementById('2209.08898v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.08898v1-abstract-full" style="display: none;"> This study introduces a new normalization layer termed Batch Layer Normalization (BLN) to reduce the problem of internal covariate shift in deep neural network layers. As a combined version of batch and layer normalization, BLN adaptively puts appropriate weight on mini-batch and feature normalization based on the inverse size of mini-batches to normalize the input to a layer during the learning process. It also performs the exact computation with a minor change at inference times, using either mini-batch statistics or population statistics. The decision process to either use statistics of mini-batch or population gives BLN the ability to play a comprehensive role in the hyper-parameter optimization process of models. The key advantage of BLN is the support of the theoretical analysis of being independent of the input data, and its statistical configuration heavily depends on the task performed, the amount of training data, and the size of batches. Test results indicate the application potential of BLN and its faster convergence than batch normalization and layer normalization in both Convolutional and Recurrent Neural Networks. The code of the experiments is publicly available online (https://github.com/A2Amir/Batch-Layer-Normalization). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08898v1-abstract-full').style.display = 'none'; document.getElementById('2209.08898v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in proceedings of the 6th international conference on Advances in Artificial Intelligence, ICAAI 2022, Birmingham, UK</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.00286">arXiv:2103.00286</a> <span> [<a href="https://arxiv.org/pdf/2103.00286">pdf</a>, <a href="https://arxiv.org/format/2103.00286">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> A Novel Adaptive Deep Network for Building Footprint Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ziaee%2C+A">A. Ziaee</a>, <a href="/search/cs?searchtype=author&query=Dehbozorgi%2C+R">R. Dehbozorgi</a>, <a href="/search/cs?searchtype=author&query=D%C3%B6ller%2C+M">M. D枚ller</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.00286v1-abstract-short" style="display: inline;"> Building footprint segmentations for high resolution images are increasingly demanded for many remote sensing applications. By the emerging deep learning approaches, segmentation networks have made significant advances in the semantic segmentation of objects. However, these advances and the increased access to satellite images require the generation of accurate object boundaries in satellite image… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00286v1-abstract-full').style.display = 'inline'; document.getElementById('2103.00286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.00286v1-abstract-full" style="display: none;"> Building footprint segmentations for high resolution images are increasingly demanded for many remote sensing applications. By the emerging deep learning approaches, segmentation networks have made significant advances in the semantic segmentation of objects. However, these advances and the increased access to satellite images require the generation of accurate object boundaries in satellite images. In the current paper, we propose a novel network-based on Pix2Pix methodology to solve the problem of inaccurate boundaries obtained by converting satellite images into maps using segmentation networks in order to segment building footprints. To define the new network named G2G, our framework includes two generators where the first generator extracts localization features in order to merge them with the boundary features extracted from the second generator to segment all detailed building edges. Moreover, different strategies are implemented to enhance the quality of the proposed networks' results, implying that the proposed network outperforms state-of-the-art networks in segmentation accuracy with a large margin for all evaluation metrics. The implementation is available at https://github.com/A2Amir/A-Novel-Adaptive-Deep-Network-for-Building-Footprint-Segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00286v1-abstract-full').style.display = 'none'; document.getElementById('2103.00286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Deep Learning Semantic Segmentation, Building Footprint Segmentation, Conditional Generative Adversarial Networks(CGANs), Pix2Pix Network</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.13644">arXiv:2102.13644</a> <span> [<a href="https://arxiv.org/pdf/2102.13644">pdf</a>, <a href="https://arxiv.org/format/2102.13644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Surgical Visual Domain Adaptation: Results from the MICCAI 2020 SurgVisDom Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Bhattacharyya%2C+K">Kiran Bhattacharyya</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziheng Wang</a>, <a href="/search/cs?searchtype=author&query=Kondo%2C+S">Satoshi Kondo</a>, <a href="/search/cs?searchtype=author&query=Colleoni%2C+E">Emanuele Colleoni</a>, <a href="/search/cs?searchtype=author&query=van+Amsterdam%2C+B">Beatrice van Amsterdam</a>, <a href="/search/cs?searchtype=author&query=Hussain%2C+R">Razeen Hussain</a>, <a href="/search/cs?searchtype=author&query=Hussain%2C+R">Raabid Hussain</a>, <a href="/search/cs?searchtype=author&query=Maier-Hein%2C+L">Lena Maier-Hein</a>, <a href="/search/cs?searchtype=author&query=Stoyanov%2C+D">Danail Stoyanov</a>, <a href="/search/cs?searchtype=author&query=Speidel%2C+S">Stefanie Speidel</a>, <a href="/search/cs?searchtype=author&query=Jarc%2C+A">Anthony Jarc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.13644v1-abstract-short" style="display: inline;"> Surgical data science is revolutionizing minimally invasive surgery by enabling context-aware applications. However, many challenges exist around surgical data (and health data, more generally) needed to develop context-aware models. This work - presented as part of the Endoscopic Vision (EndoVis) challenge at the Medical Image Computing and Computer Assisted Intervention (MICCAI) 2020 conference… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13644v1-abstract-full').style.display = 'inline'; document.getElementById('2102.13644v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.13644v1-abstract-full" style="display: none;"> Surgical data science is revolutionizing minimally invasive surgery by enabling context-aware applications. However, many challenges exist around surgical data (and health data, more generally) needed to develop context-aware models. This work - presented as part of the Endoscopic Vision (EndoVis) challenge at the Medical Image Computing and Computer Assisted Intervention (MICCAI) 2020 conference - seeks to explore the potential for visual domain adaptation in surgery to overcome data privacy concerns. In particular, we propose to use video from virtual reality (VR) simulations of surgical exercises in robotic-assisted surgery to develop algorithms to recognize tasks in a clinical-like setting. We present the performance of the different approaches to solve visual domain adaptation developed by challenge participants. Our analysis shows that the presented models were unable to learn meaningful motion based features form VR data alone, but did significantly better when small amount of clinical-like data was also made available. Based on these results, we discuss promising methods and further work to address the problem of visual domain adaptation in surgical data science. We also release the challenge dataset publicly at https://www.synapse.org/surgvisdom2020. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13644v1-abstract-full').style.display = 'none'; document.getElementById('2102.13644v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Results from SurgVisDom 2020 challenge held at MICCAI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.01663">arXiv:2008.01663</a> <span> [<a href="https://arxiv.org/pdf/2008.01663">pdf</a>, <a href="https://arxiv.org/format/2008.01663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Efficient Urdu Caption Generation using Attention based LSTM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ilahi%2C+I">Inaam Ilahi</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+H+M+A">Hafiz Muhammad Abdullah Zia</a>, <a href="/search/cs?searchtype=author&query=Ahsan%2C+M+A">Muhammad Ahtazaz Ahsan</a>, <a href="/search/cs?searchtype=author&query=Tabassam%2C+R">Rauf Tabassam</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+A">Armaghan Ahmed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.01663v4-abstract-short" style="display: inline;"> Recent advancements in deep learning have created many opportunities to solve real-world problems that remained unsolved for more than a decade. Automatic caption generation is a major research field, and the research community has done a lot of work on it in most common languages like English. Urdu is the national language of Pakistan and also much spoken and understood in the sub-continent regio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.01663v4-abstract-full').style.display = 'inline'; document.getElementById('2008.01663v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.01663v4-abstract-full" style="display: none;"> Recent advancements in deep learning have created many opportunities to solve real-world problems that remained unsolved for more than a decade. Automatic caption generation is a major research field, and the research community has done a lot of work on it in most common languages like English. Urdu is the national language of Pakistan and also much spoken and understood in the sub-continent region of Pakistan-India, and yet no work has been done for Urdu language caption generation. Our research aims to fill this gap by developing an attention-based deep learning model using techniques of sequence modeling specialized for the Urdu language. We have prepared a dataset in the Urdu language by translating a subset of the "Flickr8k" dataset containing 700 'man' images. We evaluate our proposed technique on this dataset and show that it can achieve a BLEU score of 0.83 in the Urdu language. We improve on the previous state-of-the-art by using better CNN architectures and optimization techniques. Furthermore, we provide a discussion on how the generated captions can be made correct grammar-wise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.01663v4-abstract-full').style.display = 'none'; document.getElementById('2008.01663v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This a project report of Deep Learning subject taught at Information Technology University, Lahore, Pakistan by Dr. Mohsen Ali</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.12978">arXiv:1910.12978</a> <span> [<a href="https://arxiv.org/pdf/1910.12978">pdf</a>, <a href="https://arxiv.org/format/1910.12978">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3389/fvets.2020.00130">10.3389/fvets.2020.00130 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Effects of Social Cues on Biosecurity Compliance in Livestock Facilities: Evidence from Experimental Simulations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trinity%2C+L">Luke Trinity</a>, <a href="/search/cs?searchtype=author&query=Merrill%2C+S+C">Scott C. Merrill</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+E">Eric Clark</a>, <a href="/search/cs?searchtype=author&query=Koliba%2C+C+J">Christopher J. Koliba</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Asim Zia</a>, <a href="/search/cs?searchtype=author&query=Bucini%2C+G">Gabriela Bucini</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+J+M">Julia M. Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.12978v1-abstract-short" style="display: inline;"> Disease outbreaks in U.S. animal livestock industries have economic impacts measured in hundreds of millions of dollars per year. Biosecurity, or procedures intended to protect animals against disease, is known to be effective at reducing infection risk at facilities. Yet to the detriment of animal health, humans do not always follow biosecurity protocols. Human behavioral factors have been shown… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.12978v1-abstract-full').style.display = 'inline'; document.getElementById('1910.12978v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.12978v1-abstract-full" style="display: none;"> Disease outbreaks in U.S. animal livestock industries have economic impacts measured in hundreds of millions of dollars per year. Biosecurity, or procedures intended to protect animals against disease, is known to be effective at reducing infection risk at facilities. Yet to the detriment of animal health, humans do not always follow biosecurity protocols. Human behavioral factors have been shown to influence willingness to follow biosecurity protocols. Here we show how social cues may affect cooperation with a biosecurity practice. Participants were immersed in a simulated swine production facility through a graphical user interface and prompted to make a decision that addressed their willingness to comply with a biosecurity practice. We tested the effect of varying three experimental variables: (1) the risk of acquiring an infection, (2) the delivery method of the infection risk information (numerical versus graphical), and (3) behavior of an automated coworker in the facility. We provide evidence that participants changed their behavior when they observed a simulated worker making a choice to follow or not follow a biosecurity protocol, even though the simulated worker had no economic effect on the participants' payouts. These results advance the understanding of human behavioral effects on biosecurity protocol decisions; demonstrating that social cues need to be considered by livestock facility managers when developing policies to make agricultural systems more disease resilient. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.12978v1-abstract-full').style.display = 'none'; document.getElementById('1910.12978v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 4 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.12905">arXiv:1909.12905</a> <span> [<a href="https://arxiv.org/pdf/1909.12905">pdf</a>, <a href="https://arxiv.org/format/1909.12905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1371/journal.pone.0228983">10.1371/journal.pone.0228983 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Using Digital Field Experiments To Elicit Risk Mitigation Behavioral Strategies For Disease Management Across Agricultural Production Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&query=Merrill%2C+S+C">Scott C. Merrill</a>, <a href="/search/cs?searchtype=author&query=Trinity%2C+L">Luke Trinity</a>, <a href="/search/cs?searchtype=author&query=Bucini%2C+G">Gabriela Bucini</a>, <a href="/search/cs?searchtype=author&query=Cheney%2C+N">Nicholas Cheney</a>, <a href="/search/cs?searchtype=author&query=Langle-Chimal%2C+O">Ollin Langle-Chimal</a>, <a href="/search/cs?searchtype=author&query=Shrum%2C+T">Trisha Shrum</a>, <a href="/search/cs?searchtype=author&query=Koliba%2C+C">Christopher Koliba</a>, <a href="/search/cs?searchtype=author&query=Zia%2C+A">Asim Zia</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+J+M">Julia M. Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.12905v2-abstract-short" style="display: inline;"> Failing to mitigate propagation of disease spread can result in dire economic consequences for agricultural networks. Pathogens like Porcine Epidemic Diarrhea virus, can quickly spread among producers. Biosecurity is designed to prevent infection transmission. When considering biosecurity investments, management must balance the cost of protection versus the consequences of contracting an infectio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.12905v2-abstract-full').style.display = 'inline'; document.getElementById('1909.12905v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.12905v2-abstract-full" style="display: none;"> Failing to mitigate propagation of disease spread can result in dire economic consequences for agricultural networks. Pathogens like Porcine Epidemic Diarrhea virus, can quickly spread among producers. Biosecurity is designed to prevent infection transmission. When considering biosecurity investments, management must balance the cost of protection versus the consequences of contracting an infection. Thus, an examination of the decision making processes associated with investment in biosecurity is important for enhancing system wide biosecurity. Data gathered from digital field experiments can provide insights into behavioral strategies and inform the development of decision support systems. We created an online digital experiment to simulate outbreak scenarios among swine production supply chains, where participants were tasked with making biosecurity investment decisions. In Experiment One, we quantified the risk associated with each participant's decisions and delineated three dominant categories of risk attitudes: risk averse, risk tolerant, and opportunistic. Each risk class exhibited unique approaches in reaction to risk and disease information. We also tested how information uncertainty affects risk aversion, by varying the amount of visibility of the infection as well as the amount of biosecurity implemented across the system. We found evidence that more visibility in the number of infected sites increases risk averse behaviors, while more visibility in the amount of neighboring biosecurity increased risk taking behaviors. In Experiment Two, we were surprised to find no evidence for differences in behavior of livestock specialists compared to Amazon Mechanical Turk participants. Our findings provide support for using digital field experiments to study how risk communication affects behavior, which can provide insights towards more effective messaging strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.12905v2-abstract-full').style.display = 'none'; document.getElementById('1909.12905v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.02060">arXiv:1907.02060</a> <span> [<a href="https://arxiv.org/pdf/1907.02060">pdf</a>, <a href="https://arxiv.org/ps/1907.02060">ps</a>, <a href="https://arxiv.org/format/1907.02060">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Novel evaluation of surgical activity recognition models using task-based efficiency metrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+L">Liheng Guo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Linlin Zhou</a>, <a href="/search/cs?searchtype=author&query=Essa%2C+I">Irfan Essa</a>, <a href="/search/cs?searchtype=author&query=Jarc%2C+A">Anthony Jarc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.02060v1-abstract-short" style="display: inline;"> Purpose: Surgical task-based metrics (rather than entire procedure metrics) can be used to improve surgeon training and, ultimately, patient care through focused training interventions. Machine learning models to automatically recognize individual tasks or activities are needed to overcome the otherwise manual effort of video review. Traditionally, these models have been evaluated using frame-leve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.02060v1-abstract-full').style.display = 'inline'; document.getElementById('1907.02060v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.02060v1-abstract-full" style="display: none;"> Purpose: Surgical task-based metrics (rather than entire procedure metrics) can be used to improve surgeon training and, ultimately, patient care through focused training interventions. Machine learning models to automatically recognize individual tasks or activities are needed to overcome the otherwise manual effort of video review. Traditionally, these models have been evaluated using frame-level accuracy. Here, we propose evaluating surgical activity recognition models by their effect on task-based efficiency metrics. In this way, we can determine when models have achieved adequate performance for providing surgeon feedback via metrics from individual tasks. Methods: We propose a new CNN-LSTM model, RP-Net-V2, to recognize the 12 steps of robotic-assisted radical prostatectomies (RARP). We evaluated our model both in terms of conventional methods (e.g. Jaccard Index, task boundary accuracy) as well as novel ways, such as the accuracy of efficiency metrics computed from instrument movements and system events. Results: Our proposed model achieves a Jaccard Index of 0.85 thereby outperforming previous models on robotic-assisted radical prostatectomies. Additionally, we show that metrics computed from tasks automatically identified using RP-Net-V2 correlate well with metrics from tasks labeled by clinical experts. Conclusions: We demonstrate that metrics-based evaluation of surgical activity recognition models is a viable approach to determine when models can be used to quantify surgical efficiencies. We believe this approach and our results illustrate the potential for fully automated, post-operative efficiency reports. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.02060v1-abstract-full').style.display = 'none'; document.getElementById('1907.02060v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Journal of Computer Assisted Radiology and Surgery (IJCARS) 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.00466">arXiv:1806.00466</a> <span> [<a href="https://arxiv.org/pdf/1806.00466">pdf</a>, <a href="https://arxiv.org/format/1806.00466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Surgical Activity Recognition in Robot-Assisted Radical Prostatectomy using Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Hung%2C+A">Andrew Hung</a>, <a href="/search/cs?searchtype=author&query=Essa%2C+I">Irfan Essa</a>, <a href="/search/cs?searchtype=author&query=Jarc%2C+A">Anthony Jarc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.00466v1-abstract-short" style="display: inline;"> Adverse surgical outcomes are costly to patients and hospitals. Approaches to benchmark surgical care are often limited to gross measures across the entire procedure despite the performance of particular tasks being largely responsible for undesirable outcomes. In order to produce metrics from tasks as opposed to the whole procedure, methods to recognize automatically individual surgical tasks are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.00466v1-abstract-full').style.display = 'inline'; document.getElementById('1806.00466v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.00466v1-abstract-full" style="display: none;"> Adverse surgical outcomes are costly to patients and hospitals. Approaches to benchmark surgical care are often limited to gross measures across the entire procedure despite the performance of particular tasks being largely responsible for undesirable outcomes. In order to produce metrics from tasks as opposed to the whole procedure, methods to recognize automatically individual surgical tasks are needed. In this paper, we propose several approaches to recognize surgical activities in robot-assisted minimally invasive surgery using deep learning. We collected a clinical dataset of 100 robot-assisted radical prostatectomies (RARP) with 12 tasks each and propose `RP-Net', a modified version of InceptionV3 model, for image based surgical activity recognition. We achieve an average precision of 80.9% and average recall of 76.7% across all tasks using RP-Net which out-performs all other RNN and CNN based models explored in this paper. Our results suggest that automatic surgical activity recognition during RARP is feasible and can be the foundation for advanced analytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.00466v1-abstract-full').style.display = 'none'; document.getElementById('1806.00466v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MICCAI 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.08604">arXiv:1712.08604</a> <span> [<a href="https://arxiv.org/pdf/1712.08604">pdf</a>, <a href="https://arxiv.org/format/1712.08604">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Automated Surgical Skill Assessment in RMIS Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Essa%2C+I">Irfan Essa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.08604v1-abstract-short" style="display: inline;"> Purpose: Manual feedback in basic RMIS training can consume a significant amount of time from expert surgeons' schedule and is prone to subjectivity. While VR-based training tasks can generate automated score reports, there is no mechanism of generating automated feedback for surgeons performing basic surgical tasks in RMIS training. In this paper, we explore the usage of different holistic featur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.08604v1-abstract-full').style.display = 'inline'; document.getElementById('1712.08604v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.08604v1-abstract-full" style="display: none;"> Purpose: Manual feedback in basic RMIS training can consume a significant amount of time from expert surgeons' schedule and is prone to subjectivity. While VR-based training tasks can generate automated score reports, there is no mechanism of generating automated feedback for surgeons performing basic surgical tasks in RMIS training. In this paper, we explore the usage of different holistic features for automated skill assessment using only robot kinematic data and propose a weighted feature fusion technique for improving score prediction performance. Methods: We perform our experiments on the publicly available JIGSAWS dataset and evaluate four different types of holistic features from robot kinematic data - Sequential Motion Texture (SMT), Discrete Fourier Transform (DFT), Discrete Cosine Transform (DCT) and Approximate Entropy (ApEn). The features are then used for skill classification and exact skill score prediction. Along with using these features individually, we also evaluate the performance using our proposed weighted combination technique. Results: Our results demonstrate that these holistic features outperform all previous HMM based state-of-the-art methods for skill classification on the JIGSAWS dataset. Also, our proposed feature fusion strategy significantly improves performance for skill score predictions achieving up to 0.61 average spearman correlation coefficient. Conclusions: Holistic features capturing global information from robot kinematic data can successfully be used for evaluating surgeon skill in basic surgical tasks on the da Vinci robot. Using the framework presented can potentially allow for real time score feedback in RMIS training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.08604v1-abstract-full').style.display = 'none'; document.getElementById('1712.08604v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IPCAI 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1702.07772">arXiv:1702.07772</a> <span> [<a href="https://arxiv.org/pdf/1702.07772">pdf</a>, <a href="https://arxiv.org/format/1702.07772">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Video and Accelerometer-Based Motion Analysis for Automated Surgical Skills Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Aneeq Zia</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+Y">Yachna Sharma</a>, <a href="/search/cs?searchtype=author&query=Bettadapura%2C+V">Vinay Bettadapura</a>, <a href="/search/cs?searchtype=author&query=Sarin%2C+E+L">Eric L. Sarin</a>, <a href="/search/cs?searchtype=author&query=Essa%2C+I">Irfan Essa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1702.07772v1-abstract-short" style="display: inline;"> Purpose: Basic surgical skills of suturing and knot tying are an essential part of medical training. Having an automated system for surgical skills assessment could help save experts time and improve training efficiency. There have been some recent attempts at automated surgical skills assessment using either video analysis or acceleration data. In this paper, we present a novel approach for autom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1702.07772v1-abstract-full').style.display = 'inline'; document.getElementById('1702.07772v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1702.07772v1-abstract-full" style="display: none;"> Purpose: Basic surgical skills of suturing and knot tying are an essential part of medical training. Having an automated system for surgical skills assessment could help save experts time and improve training efficiency. There have been some recent attempts at automated surgical skills assessment using either video analysis or acceleration data. In this paper, we present a novel approach for automated assessment of OSATS based surgical skills and provide an analysis of different features on multi-modal data (video and accelerometer data). Methods: We conduct the largest study, to the best of our knowledge, for basic surgical skills assessment on a dataset that contained video and accelerometer data for suturing and knot-tying tasks. We introduce "entropy based" features - Approximate Entropy (ApEn) and Cross-Approximate Entropy (XApEn), which quantify the amount of predictability and regularity of fluctuations in time-series data. The proposed features are compared to existing methods of Sequential Motion Texture (SMT), Discrete Cosine Transform (DCT) and Discrete Fourier Transform (DFT), for surgical skills assessment. Results: We report average performance of different features across all applicable OSATS criteria for suturing and knot tying tasks. Our analysis shows that the proposed entropy based features out-perform previous state-of-the-art methods using video data. For accelerometer data, our method performs better for suturing only. We also show that fusion of video and acceleration features can improve overall performance with the proposed entropy features achieving highest accuracy. Conclusions: Automated surgical skills assessment can be achieved with high accuracy using the proposed entropy features. Such a system can significantly improve the efficiency of surgical training in medical schools and teaching hospitals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1702.07772v1-abstract-full').style.display = 'none'; document.getElementById('1702.07772v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/0811.0777">arXiv:0811.0777</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> A random coding theorem for "modulo-two adder" source network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zia%2C+A">Amin Zia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="0811.0777v2-abstract-short" style="display: inline;"> This paper has been withdrawn by the author, due a crucial error in the proof of the main Theorem (Sec. 3). In particular, in deriving the bound on the probability of error (Eq. 10) the contribution of those pairs (x', y') that are not equal to (x, y) has not been considered. By adding the contribution of these pairs, one can verify that a region of rates similar to the Slepian-Wolf region will… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('0811.0777v2-abstract-full').style.display = 'inline'; document.getElementById('0811.0777v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="0811.0777v2-abstract-full" style="display: none;"> This paper has been withdrawn by the author, due a crucial error in the proof of the main Theorem (Sec. 3). In particular, in deriving the bound on the probability of error (Eq. 10) the contribution of those pairs (x', y') that are not equal to (x, y) has not been considered. By adding the contribution of these pairs, one can verify that a region of rates similar to the Slepian-Wolf region will emerge. The author would like to acknowledge a critical review of the paper by Mr. Paul Cuff of Stanford University who first pointed out the error. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('0811.0777v2-abstract-full').style.display = 'none'; document.getElementById('0811.0777v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2009; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2008; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2008. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been withdrawn</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository