Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–16 of 16 results for author: <span class="mathjax">Thapa, R</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Thapa%2C+R">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Thapa, R"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Thapa%2C+R&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Thapa, R"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11271">arXiv:2502.11271</a> <span> [<a href="https://arxiv.org/pdf/2502.11271">pdf</a>, <a href="https://arxiv.org/format/2502.11271">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> OctoTools: An Agentic Framework with Extensible Tools for Complex Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+P">Pan Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bowen Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sheng Liu</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Boen%2C+J">Joseph Boen</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11271v1-abstract-short" style="display: inline;"> Solving complex reasoning tasks may involve visual understanding, domain knowledge retrieval, numerical calculation, and multi-step reasoning. Existing methods augment large language models (LLMs) with external tools but are restricted to specialized domains, limited tool types, or require additional training data. In this paper, we introduce OctoTools, a training-free, user-friendly, and easily e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11271v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11271v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11271v1-abstract-full" style="display: none;"> Solving complex reasoning tasks may involve visual understanding, domain knowledge retrieval, numerical calculation, and multi-step reasoning. Existing methods augment large language models (LLMs) with external tools but are restricted to specialized domains, limited tool types, or require additional training data. In this paper, we introduce OctoTools, a training-free, user-friendly, and easily extensible open-source agentic framework designed to tackle complex reasoning across diverse domains. OctoTools introduces standardized tool cards to encapsulate tool functionality, a planner for both high-level and low-level planning, and an executor to carry out tool usage. We validate OctoTools' generality across 16 diverse tasks (including MathVista, MMLU-Pro, MedQA, and GAIA-Text), achieving substantial average accuracy gains of 9.3% over GPT-4o. Furthermore, OctoTools outperforms AutoGen, GPT-Functions and LangChain by up to 10.6% when given the same set of tools. Through comprehensive analysis and ablations, OctoTools demonstrates advantages in task planning, effective tool usage, and multi-step problem solving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11271v1-abstract-full').style.display = 'none'; document.getElementById('2502.11271v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">89 pages, 18 figures. Project website: https://octotools.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03675">arXiv:2501.03675</a> <span> [<a href="https://arxiv.org/pdf/2501.03675">pdf</a>, <a href="https://arxiv.org/format/2501.03675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SMIR: Efficient Synthetic Data Pipeline To Improve Multi-Image Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+A">Andrew Li</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Chalamala%2C+R">Rahul Chalamala</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qingyang Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kezhen Chen</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03675v2-abstract-short" style="display: inline;"> Vision-Language Models (VLMs) excel at understanding single images, aided by high-quality instruction datasets. However, multi-image reasoning remains underexplored in the open-source community due to two key challenges: (1) scaling datasets with correlated images and complex reasoning instructions is resource-intensive, and (2) robust evaluation benchmarks for multi-image tasks are lacking. To ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03675v2-abstract-full').style.display = 'inline'; document.getElementById('2501.03675v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03675v2-abstract-full" style="display: none;"> Vision-Language Models (VLMs) excel at understanding single images, aided by high-quality instruction datasets. However, multi-image reasoning remains underexplored in the open-source community due to two key challenges: (1) scaling datasets with correlated images and complex reasoning instructions is resource-intensive, and (2) robust evaluation benchmarks for multi-image tasks are lacking. To address this, we introduce SMiR, a synthetic data-generation pipeline for multi-image reasoning, along with a high-quality dataset generated using this pipeline. SMiR efficiently extracts correlated images via multimodal embeddings, integrates visual and descriptive information, and leverages open-source LLMs to generate quality instructions. Using this approach, we produce 160K synthetic training samples, offering a cost-effective alternative to closed-source solutions. Additionally, we present SMiR-Bench, a multi-image reasoning benchmark comprising 200 diverse examples across seven complex reasoning tasks. SMiR-Bench is multi-turn and employs a VLM judge to evaluate free-form responses, providing a comprehensive assessment of model expressiveness and reasoning capability across modalities. We demonstrate the effectiveness of SMiR by fine-tuning open-source VLMs and evaluating them on SMiR-Bench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03675v2-abstract-full').style.display = 'none'; document.getElementById('2501.03675v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00977">arXiv:2406.00977</a> <span> [<a href="https://arxiv.org/pdf/2406.00977">pdf</a>, <a href="https://arxiv.org/format/2406.00977">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dragonfly: Multi-Resolution Zoom-In Encoding Enhances Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kezhen Chen</a>, <a href="/search/cs?searchtype=author&query=Covert%2C+I">Ian Covert</a>, <a href="/search/cs?searchtype=author&query=Chalamala%2C+R">Rahul Chalamala</a>, <a href="/search/cs?searchtype=author&query=Athiwaratkun%2C+B">Ben Athiwaratkun</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S+L">Shuaiwen Leon Song</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00977v2-abstract-short" style="display: inline;"> Recent advances in vision-language models (VLMs) have demonstrated the advantages of processing images at higher resolutions and utilizing multi-crop features to preserve native resolution details. However, despite these improvements, existing vision transformers (ViTs) still struggle to capture fine-grained details from less prominent objects, charts, and embedded text, limiting their effectivene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00977v2-abstract-full').style.display = 'inline'; document.getElementById('2406.00977v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00977v2-abstract-full" style="display: none;"> Recent advances in vision-language models (VLMs) have demonstrated the advantages of processing images at higher resolutions and utilizing multi-crop features to preserve native resolution details. However, despite these improvements, existing vision transformers (ViTs) still struggle to capture fine-grained details from less prominent objects, charts, and embedded text, limiting their effectiveness in certain tasks. In this paper, we extend recent high-resolution and multi-crop techniques by not only preserving the native resolution, but zooming in beyond it and extracting features from a large number of image sub-crops. This enhancement allows our model to better capture fine-grained details, overcoming the limitations of current ViTs. To manage the increased token count and computational complexity, we demonstrate that a simple mean-pooling aggregation over tokens is effective. Our model, Dragonfly, achieves competitive performance on general-domain tasks such as ScienceQA and AI2D, and excels in tasks requiring fine-grained image understanding, including TextVQA and ChartQA. Among models in the 7-8B parameter range, Dragonfly consistently ranks at the top across ten general-domain benchmarks, achieving the highest or second-highest scores in most cases, outperforming models that are significantly larger or trained on larger datasets. Our biomedical model, Dragonfly-Med, sets new benchmarks on several medical tasks, achieving 91.6% accuracy on SLAKE (compared to 84.8% for Med-Gemini), a 67.1% token F1 score on Path-VQA (compared to 62.7% for Med-PaLM M), and state-of-the-art results across the majority of image captioning tasks. Overall, our work highlights the persistent challenge of engineering visual representations with fixed-resolution ViTs, and proposes a simple yet effective solution to address this issue and boost performance in both general and specialized domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00977v2-abstract-full').style.display = 'none'; document.getElementById('2406.00977v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17766">arXiv:2405.17766</a> <span> [<a href="https://arxiv.org/pdf/2405.17766">pdf</a>, <a href="https://arxiv.org/format/2405.17766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> SleepFM: Multi-modal Representation Learning for Sleep Across Brain Activity, ECG and Respiratory Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Bryan He</a>, <a href="/search/cs?searchtype=author&query=Kjaer%2C+M+R">Magnus Ruud Kjaer</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+H">Hyatt Moore</a>, <a href="/search/cs?searchtype=author&query=Ganjoo%2C+G">Gauri Ganjoo</a>, <a href="/search/cs?searchtype=author&query=Mignot%2C+E">Emmanuel Mignot</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17766v1-abstract-short" style="display: inline;"> Sleep is a complex physiological process evaluated through various modalities recording electrical brain, cardiac, and respiratory activities. We curate a large polysomnography dataset from over 14,000 participants comprising over 100,000 hours of multi-modal sleep recordings. Leveraging this extensive dataset, we developed SleepFM, the first multi-modal foundation model for sleep analysis. We sho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17766v1-abstract-full').style.display = 'inline'; document.getElementById('2405.17766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17766v1-abstract-full" style="display: none;"> Sleep is a complex physiological process evaluated through various modalities recording electrical brain, cardiac, and respiratory activities. We curate a large polysomnography dataset from over 14,000 participants comprising over 100,000 hours of multi-modal sleep recordings. Leveraging this extensive dataset, we developed SleepFM, the first multi-modal foundation model for sleep analysis. We show that a novel leave-one-out approach for contrastive learning significantly improves downstream task performance compared to representations from standard pairwise contrastive learning. A logistic regression model trained on SleepFM's learned embeddings outperforms an end-to-end trained convolutional neural network (CNN) on sleep stage classification (macro AUROC 0.88 vs 0.72 and macro AUPRC 0.72 vs 0.48) and sleep disordered breathing detection (AUROC 0.85 vs 0.69 and AUPRC 0.77 vs 0.61). Notably, the learned embeddings achieve 48% top-1 average accuracy in retrieving the corresponding recording clips of other modalities from 90,000 candidates. This work demonstrates the value of holistic multi-modal sleep modeling to fully capture the richness of sleep recordings. SleepFM is open source and available at https://github.com/rthapa84/sleepfm-codebase. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17766v1-abstract-full').style.display = 'none'; document.getElementById('2405.17766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07911">arXiv:2403.07911</a> <span> [<a href="https://arxiv.org/pdf/2403.07911">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Standing on FURM ground -- A framework for evaluating Fair, Useful, and Reliable AI Models in healthcare systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Callahan%2C+A">Alison Callahan</a>, <a href="/search/cs?searchtype=author&query=McElfresh%2C+D">Duncan McElfresh</a>, <a href="/search/cs?searchtype=author&query=Banda%2C+J+M">Juan M. Banda</a>, <a href="/search/cs?searchtype=author&query=Bunney%2C+G">Gabrielle Bunney</a>, <a href="/search/cs?searchtype=author&query=Char%2C+D">Danton Char</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jonathan Chen</a>, <a href="/search/cs?searchtype=author&query=Corbin%2C+C+K">Conor K. Corbin</a>, <a href="/search/cs?searchtype=author&query=Dash%2C+D">Debadutta Dash</a>, <a href="/search/cs?searchtype=author&query=Downing%2C+N+L">Norman L. Downing</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S+S">Sneha S. Jain</a>, <a href="/search/cs?searchtype=author&query=Kotecha%2C+N">Nikesh Kotecha</a>, <a href="/search/cs?searchtype=author&query=Masterson%2C+J">Jonathan Masterson</a>, <a href="/search/cs?searchtype=author&query=Mello%2C+M+M">Michelle M. Mello</a>, <a href="/search/cs?searchtype=author&query=Morse%2C+K">Keith Morse</a>, <a href="/search/cs?searchtype=author&query=Nallan%2C+S">Srikar Nallan</a>, <a href="/search/cs?searchtype=author&query=Pandya%2C+A">Abby Pandya</a>, <a href="/search/cs?searchtype=author&query=Revri%2C+A">Anurang Revri</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Aditya Sharma</a>, <a href="/search/cs?searchtype=author&query=Sharp%2C+C">Christopher Sharp</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Wornow%2C+M">Michael Wornow</a>, <a href="/search/cs?searchtype=author&query=Youssef%2C+A">Alaa Youssef</a>, <a href="/search/cs?searchtype=author&query=Pfeffer%2C+M+A">Michael A. Pfeffer</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N+H">Nigam H. Shah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07911v2-abstract-short" style="display: inline;"> The impact of using artificial intelligence (AI) to guide patient care or operational processes is an interplay of the AI model's output, the decision-making protocol based on that output, and the capacity of the stakeholders involved to take the necessary subsequent action. Estimating the effects of this interplay before deployment, and studying it in real time afterwards, are essential to bridge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07911v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07911v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07911v2-abstract-full" style="display: none;"> The impact of using artificial intelligence (AI) to guide patient care or operational processes is an interplay of the AI model's output, the decision-making protocol based on that output, and the capacity of the stakeholders involved to take the necessary subsequent action. Estimating the effects of this interplay before deployment, and studying it in real time afterwards, are essential to bridge the chasm between AI model development and achievable benefit. To accomplish this, the Data Science team at Stanford Health Care has developed a Testing and Evaluation (T&E) mechanism to identify fair, useful and reliable AI models (FURM) by conducting an ethical review to identify potential value mismatches, simulations to estimate usefulness, financial projections to assess sustainability, as well as analyses to determine IT feasibility, design a deployment strategy, and recommend a prospective monitoring and evaluation plan. We report on FURM assessments done to evaluate six AI guided solutions for potential adoption, spanning clinical and operational settings, each with the potential to impact from several dozen to tens of thousands of patients each year. We describe the assessment process, summarize the six assessments, and share our framework to enable others to conduct similar assessments. Of the six solutions we assessed, two have moved into a planning and implementation phase. Our novel contributions - usefulness estimates by simulation, financial projections to quantify sustainability, and a process to do ethical assessments - as well as their underlying methods and open source tools, are available for other healthcare systems to conduct actionable evaluations of candidate AI solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07911v2-abstract-full').style.display = 'none'; document.getElementById('2403.07911v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.19371">arXiv:2402.19371</a> <span> [<a href="https://arxiv.org/pdf/2402.19371">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> OpenMedLM: Prompt engineering can out-perform fine-tuning in medical question-answering with open-source large language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maharjan%2C+J">Jenish Maharjan</a>, <a href="/search/cs?searchtype=author&query=Garikipati%2C+A">Anurag Garikipati</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+N+P">Navan Preet Singh</a>, <a href="/search/cs?searchtype=author&query=Cyrus%2C+L">Leo Cyrus</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+M">Mayank Sharma</a>, <a href="/search/cs?searchtype=author&query=Ciobanu%2C+M">Madalina Ciobanu</a>, <a href="/search/cs?searchtype=author&query=Barnes%2C+G">Gina Barnes</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Q">Qingqing Mao</a>, <a href="/search/cs?searchtype=author&query=Das%2C+R">Ritankar Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.19371v1-abstract-short" style="display: inline;"> LLMs have become increasingly capable at accomplishing a range of specialized-tasks and can be utilized to expand equitable access to medical knowledge. Most medical LLMs have involved extensive fine-tuning, leveraging specialized medical data and significant, thus costly, amounts of computational power. Many of the top performing LLMs are proprietary and their access is limited to very few resear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19371v1-abstract-full').style.display = 'inline'; document.getElementById('2402.19371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.19371v1-abstract-full" style="display: none;"> LLMs have become increasingly capable at accomplishing a range of specialized-tasks and can be utilized to expand equitable access to medical knowledge. Most medical LLMs have involved extensive fine-tuning, leveraging specialized medical data and significant, thus costly, amounts of computational power. Many of the top performing LLMs are proprietary and their access is limited to very few research groups. However, open-source (OS) models represent a key area of growth for medical LLMs due to significant improvements in performance and an inherent ability to provide the transparency and compliance required in healthcare. We present OpenMedLM, a prompting platform which delivers state-of-the-art (SOTA) performance for OS LLMs on medical benchmarks. We evaluated a range of OS foundation LLMs (7B-70B) on four medical benchmarks (MedQA, MedMCQA, PubMedQA, MMLU medical-subset). We employed a series of prompting strategies, including zero-shot, few-shot, chain-of-thought (random selection and kNN selection), and ensemble/self-consistency voting. We found that OpenMedLM delivers OS SOTA results on three common medical LLM benchmarks, surpassing the previous best performing OS models that leveraged computationally costly extensive fine-tuning. The model delivers a 72.6% accuracy on the MedQA benchmark, outperforming the previous SOTA by 2.4%, and achieves 81.7% accuracy on the MMLU medical-subset, establishing itself as the first OS LLM to surpass 80% accuracy on this benchmark. Our results highlight medical-specific emergent properties in OS LLMs which have not yet been documented to date elsewhere, and showcase the benefits of further leveraging prompt engineering to improve the performance of accessible LLMs for medical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19371v1-abstract-full').style.display = 'none'; document.getElementById('2402.19371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.14089">arXiv:2308.14089</a> <span> [<a href="https://arxiv.org/pdf/2308.14089">pdf</a>, <a href="https://arxiv.org/format/2308.14089">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MedAlign: A Clinician-Generated Dataset for Instruction Following with Electronic Medical Records </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fleming%2C+S+L">Scott L. Fleming</a>, <a href="/search/cs?searchtype=author&query=Lozano%2C+A">Alejandro Lozano</a>, <a href="/search/cs?searchtype=author&query=Haberkorn%2C+W+J">William J. Haberkorn</a>, <a href="/search/cs?searchtype=author&query=Jindal%2C+J+A">Jenelle A. Jindal</a>, <a href="/search/cs?searchtype=author&query=Reis%2C+E+P">Eduardo P. Reis</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Blankemeier%2C+L">Louis Blankemeier</a>, <a href="/search/cs?searchtype=author&query=Genkins%2C+J+Z">Julian Z. Genkins</a>, <a href="/search/cs?searchtype=author&query=Steinberg%2C+E">Ethan Steinberg</a>, <a href="/search/cs?searchtype=author&query=Nayak%2C+A">Ashwin Nayak</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+B+S">Birju S. Patel</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+C">Chia-Chun Chiang</a>, <a href="/search/cs?searchtype=author&query=Callahan%2C+A">Alison Callahan</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Z">Zepeng Huo</a>, <a href="/search/cs?searchtype=author&query=Gatidis%2C+S">Sergios Gatidis</a>, <a href="/search/cs?searchtype=author&query=Adams%2C+S+J">Scott J. Adams</a>, <a href="/search/cs?searchtype=author&query=Fayanju%2C+O">Oluseyi Fayanju</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+S+J">Shreya J. Shah</a>, <a href="/search/cs?searchtype=author&query=Savage%2C+T">Thomas Savage</a>, <a href="/search/cs?searchtype=author&query=Goh%2C+E">Ethan Goh</a>, <a href="/search/cs?searchtype=author&query=Chaudhari%2C+A+S">Akshay S. Chaudhari</a>, <a href="/search/cs?searchtype=author&query=Aghaeepour%2C+N">Nima Aghaeepour</a>, <a href="/search/cs?searchtype=author&query=Sharp%2C+C">Christopher Sharp</a>, <a href="/search/cs?searchtype=author&query=Pfeffer%2C+M+A">Michael A. Pfeffer</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+P">Percy Liang</a> , et al. (5 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.14089v2-abstract-short" style="display: inline;"> The ability of large language models (LLMs) to follow natural language instructions with human-level fluency suggests many opportunities in healthcare to reduce administrative burden and improve quality of care. However, evaluating LLMs on realistic text generation tasks for healthcare remains challenging. Existing question answering datasets for electronic health record (EHR) data fail to capture… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14089v2-abstract-full').style.display = 'inline'; document.getElementById('2308.14089v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.14089v2-abstract-full" style="display: none;"> The ability of large language models (LLMs) to follow natural language instructions with human-level fluency suggests many opportunities in healthcare to reduce administrative burden and improve quality of care. However, evaluating LLMs on realistic text generation tasks for healthcare remains challenging. Existing question answering datasets for electronic health record (EHR) data fail to capture the complexity of information needs and documentation burdens experienced by clinicians. To address these challenges, we introduce MedAlign, a benchmark dataset of 983 natural language instructions for EHR data. MedAlign is curated by 15 clinicians (7 specialities), includes clinician-written reference responses for 303 instructions, and provides 276 longitudinal EHRs for grounding instruction-response pairs. We used MedAlign to evaluate 6 general domain LLMs, having clinicians rank the accuracy and quality of each LLM response. We found high error rates, ranging from 35% (GPT-4) to 68% (MPT-7B-Instruct), and an 8.3% drop in accuracy moving from 32k to 2k context lengths for GPT-4. Finally, we report correlations between clinician rankings and automated natural language generation metrics as a way to rank LLMs without human review. We make MedAlign available under a research data use agreement to enable LLM evaluations on tasks aligned with clinician needs and preferences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14089v2-abstract-full').style.display = 'none'; document.getElementById('2308.14089v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.02028">arXiv:2307.02028</a> <span> [<a href="https://arxiv.org/pdf/2307.02028">pdf</a>, <a href="https://arxiv.org/format/2307.02028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> EHRSHOT: An EHR Benchmark for Few-Shot Evaluation of Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wornow%2C+M">Michael Wornow</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Steinberg%2C+E">Ethan Steinberg</a>, <a href="/search/cs?searchtype=author&query=Fries%2C+J+A">Jason A. Fries</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N+H">Nigam H. Shah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.02028v3-abstract-short" style="display: inline;"> While the general machine learning (ML) community has benefited from public datasets, tasks, and models, the progress of ML in healthcare has been hampered by a lack of such shared assets. The success of foundation models creates new challenges for healthcare ML by requiring access to shared pretrained models to validate performance benefits. We help address these challenges through three contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.02028v3-abstract-full').style.display = 'inline'; document.getElementById('2307.02028v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.02028v3-abstract-full" style="display: none;"> While the general machine learning (ML) community has benefited from public datasets, tasks, and models, the progress of ML in healthcare has been hampered by a lack of such shared assets. The success of foundation models creates new challenges for healthcare ML by requiring access to shared pretrained models to validate performance benefits. We help address these challenges through three contributions. First, we publish a new dataset, EHRSHOT, which contains deidentified structured data from the electronic health records (EHRs) of 6,739 patients from Stanford Medicine. Unlike MIMIC-III/IV and other popular EHR datasets, EHRSHOT is longitudinal and not restricted to ICU/ED patients. Second, we publish the weights of CLMBR-T-base, a 141M parameter clinical foundation model pretrained on the structured EHR data of 2.57M patients. We are one of the first to fully release such a model for coded EHR data; in contrast, most prior models released for clinical data (e.g. GatorTron, ClinicalBERT) only work with unstructured text and cannot process the rich, structured data within an EHR. We provide an end-to-end pipeline for the community to validate and build upon its performance. Third, we define 15 few-shot clinical prediction tasks, enabling evaluation of foundation models on benefits such as sample efficiency and task adaptation. Our model and dataset are available via a research data use agreement from our website: https://ehrshot.stanford.edu. Code to reproduce our results are available at our Github repo: https://github.com/som-shahlab/ehrshot-benchmark <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.02028v3-abstract-full').style.display = 'none'; document.getElementById('2307.02028v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.13714">arXiv:2304.13714</a> <span> [<a href="https://arxiv.org/pdf/2304.13714">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Evaluation of GPT-3.5 and GPT-4 for supporting real-world information needs in healthcare delivery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dash%2C+D">Debadutta Dash</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Banda%2C+J+M">Juan M. Banda</a>, <a href="/search/cs?searchtype=author&query=Swaminathan%2C+A">Akshay Swaminathan</a>, <a href="/search/cs?searchtype=author&query=Cheatham%2C+M">Morgan Cheatham</a>, <a href="/search/cs?searchtype=author&query=Kashyap%2C+M">Mehr Kashyap</a>, <a href="/search/cs?searchtype=author&query=Kotecha%2C+N">Nikesh Kotecha</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J+H">Jonathan H. Chen</a>, <a href="/search/cs?searchtype=author&query=Gombar%2C+S">Saurabh Gombar</a>, <a href="/search/cs?searchtype=author&query=Downing%2C+L">Lance Downing</a>, <a href="/search/cs?searchtype=author&query=Pedreira%2C+R">Rachel Pedreira</a>, <a href="/search/cs?searchtype=author&query=Goh%2C+E">Ethan Goh</a>, <a href="/search/cs?searchtype=author&query=Arnaout%2C+A">Angel Arnaout</a>, <a href="/search/cs?searchtype=author&query=Morris%2C+G+K">Garret Kenn Morris</a>, <a href="/search/cs?searchtype=author&query=Magon%2C+H">Honor Magon</a>, <a href="/search/cs?searchtype=author&query=Lungren%2C+M+P">Matthew P Lungren</a>, <a href="/search/cs?searchtype=author&query=Horvitz%2C+E">Eric Horvitz</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N+H">Nigam H. Shah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.13714v3-abstract-short" style="display: inline;"> Despite growing interest in using large language models (LLMs) in healthcare, current explorations do not assess the real-world utility and safety of LLMs in clinical settings. Our objective was to determine whether two LLMs can serve information needs submitted by physicians as questions to an informatics consultation service in a safe and concordant manner. Sixty six questions from an informatic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.13714v3-abstract-full').style.display = 'inline'; document.getElementById('2304.13714v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.13714v3-abstract-full" style="display: none;"> Despite growing interest in using large language models (LLMs) in healthcare, current explorations do not assess the real-world utility and safety of LLMs in clinical settings. Our objective was to determine whether two LLMs can serve information needs submitted by physicians as questions to an informatics consultation service in a safe and concordant manner. Sixty six questions from an informatics consult service were submitted to GPT-3.5 and GPT-4 via simple prompts. 12 physicians assessed the LLM responses' possibility of patient harm and concordance with existing reports from an informatics consultation service. Physician assessments were summarized based on majority vote. For no questions did a majority of physicians deem either LLM response as harmful. For GPT-3.5, responses to 8 questions were concordant with the informatics consult report, 20 discordant, and 9 were unable to be assessed. There were 29 responses with no majority on "Agree", "Disagree", and "Unable to assess". For GPT-4, responses to 13 questions were concordant, 15 discordant, and 3 were unable to be assessed. There were 35 responses with no majority. Responses from both LLMs were largely devoid of overt harm, but less than 20% of the responses agreed with an answer from an informatics consultation service, responses contained hallucinated references, and physicians were divided on what constitutes harm. These results suggest that while general purpose LLMs are able to provide safe and credible responses, they often do not meet the specific information need of a given question. A definitive evaluation of the usefulness of LLMs in healthcare settings will likely require additional research on prompt engineering, calibration, and custom-tailoring of general purpose models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.13714v3-abstract-full').style.display = 'none'; document.getElementById('2304.13714v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages including supplemental information</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.12961">arXiv:2303.12961</a> <span> [<a href="https://arxiv.org/pdf/2303.12961">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The Shaky Foundations of Clinical Foundation Models: A Survey of Large Language Models and Foundation Models for EMRs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wornow%2C+M">Michael Wornow</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yizhe Xu</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+B">Birju Patel</a>, <a href="/search/cs?searchtype=author&query=Steinberg%2C+E">Ethan Steinberg</a>, <a href="/search/cs?searchtype=author&query=Fleming%2C+S">Scott Fleming</a>, <a href="/search/cs?searchtype=author&query=Pfeffer%2C+M+A">Michael A. Pfeffer</a>, <a href="/search/cs?searchtype=author&query=Fries%2C+J">Jason Fries</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N+H">Nigam H. Shah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.12961v2-abstract-short" style="display: inline;"> The successes of foundation models such as ChatGPT and AlphaFold have spurred significant interest in building similar models for electronic medical records (EMRs) to improve patient care and hospital operations. However, recent hype has obscured critical gaps in our understanding of these models' capabilities. We review over 80 foundation models trained on non-imaging EMR data (i.e. clinical text… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12961v2-abstract-full').style.display = 'inline'; document.getElementById('2303.12961v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.12961v2-abstract-full" style="display: none;"> The successes of foundation models such as ChatGPT and AlphaFold have spurred significant interest in building similar models for electronic medical records (EMRs) to improve patient care and hospital operations. However, recent hype has obscured critical gaps in our understanding of these models' capabilities. We review over 80 foundation models trained on non-imaging EMR data (i.e. clinical text and/or structured data) and create a taxonomy delineating their architectures, training data, and potential use cases. We find that most models are trained on small, narrowly-scoped clinical datasets (e.g. MIMIC-III) or broad, public biomedical corpora (e.g. PubMed) and are evaluated on tasks that do not provide meaningful insights on their usefulness to health systems. In light of these findings, we propose an improved evaluation framework for measuring the benefits of clinical foundation models that is more closely grounded to metrics that matter in healthcare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12961v2-abstract-full').style.display = 'none'; document.getElementById('2303.12961v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Reformatted figures, updated contributions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.06269">arXiv:2303.06269</a> <span> [<a href="https://arxiv.org/pdf/2303.06269">pdf</a>, <a href="https://arxiv.org/format/2303.06269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DEPLOYR: A technical framework for deploying custom real-time machine learning models into the electronic medical record </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Corbin%2C+C+K">Conor K. Corbin</a>, <a href="/search/cs?searchtype=author&query=Maclay%2C+R">Rob Maclay</a>, <a href="/search/cs?searchtype=author&query=Acharya%2C+A">Aakash Acharya</a>, <a href="/search/cs?searchtype=author&query=Mony%2C+S">Sreedevi Mony</a>, <a href="/search/cs?searchtype=author&query=Punnathanam%2C+S">Soumya Punnathanam</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Kotecha%2C+N">Nikesh Kotecha</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N+H">Nigam H. Shah</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J+H">Jonathan H. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.06269v1-abstract-short" style="display: inline;"> Machine learning (ML) applications in healthcare are extensively researched, but successful translations to the bedside are scant. Healthcare institutions are establishing frameworks to govern and promote the implementation of accurate, actionable and reliable models that integrate with clinical workflow. Such governance frameworks require an accompanying technical framework to deploy models in a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.06269v1-abstract-full').style.display = 'inline'; document.getElementById('2303.06269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.06269v1-abstract-full" style="display: none;"> Machine learning (ML) applications in healthcare are extensively researched, but successful translations to the bedside are scant. Healthcare institutions are establishing frameworks to govern and promote the implementation of accurate, actionable and reliable models that integrate with clinical workflow. Such governance frameworks require an accompanying technical framework to deploy models in a resource efficient manner. Here we present DEPLOYR, a technical framework for enabling real-time deployment and monitoring of researcher created clinical ML models into a widely used electronic medical record (EMR) system. We discuss core functionality and design decisions, including mechanisms to trigger inference based on actions within EMR software, modules that collect real-time data to make inferences, mechanisms that close-the-loop by displaying inferences back to end-users within their workflow, monitoring modules that track performance of deployed models over time, silent deployment capabilities, and mechanisms to prospectively evaluate a deployed model's impact. We demonstrate the use of DEPLOYR by silently deploying and prospectively evaluating twelve ML models triggered by clinician button-clicks in Stanford Health Care's production instance of Epic. Our study highlights the need and feasibility for such silent deployment, because prospectively measured performance varies from retrospective estimates. By describing DEPLOYR, we aim to inform ML deployment best practices and help bridge the model implementation gap. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.06269v1-abstract-full').style.display = 'none'; document.getElementById('2303.06269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03187">arXiv:2211.03187</a> <span> [<a href="https://arxiv.org/pdf/2211.03187">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1177/03611981221076120">10.1177/03611981221076120 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Applying Association Rules Mining to Investigate Pedestrian Fatal and Injury Crash Patterns Under Different Lighting Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hossain%2C+A">Ahmed Hossain</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiaoduan Sun</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Raju Thapa</a>, <a href="/search/cs?searchtype=author&query=Codjoe%2C+J">Julius Codjoe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03187v1-abstract-short" style="display: inline;"> The pattern of pedestrian crashes varies greatly depending on lighting circumstances, emphasizing the need of examining pedestrian crashes in various lighting conditions. Using Louisiana pedestrian fatal and injury crash data (2010-2019), this study applied Association Rules Mining (ARM) to identify the hidden pattern of crash risk factors according to three different lighting conditions (daylight… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03187v1-abstract-full').style.display = 'inline'; document.getElementById('2211.03187v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03187v1-abstract-full" style="display: none;"> The pattern of pedestrian crashes varies greatly depending on lighting circumstances, emphasizing the need of examining pedestrian crashes in various lighting conditions. Using Louisiana pedestrian fatal and injury crash data (2010-2019), this study applied Association Rules Mining (ARM) to identify the hidden pattern of crash risk factors according to three different lighting conditions (daylight, dark-with-streetlight, and dark-no-streetlight). Based on the generated rules, the results show that daylight pedestrian crashes are associated with children (less than 15 years), senior pedestrians (greater than 64 years), older drivers (>64 years), and other driving behaviors such as failure to yield, inattentive/distracted, illness/fatigue/asleep. Additionally, young drivers (15-24 years) are involved in severe pedestrian crashes in daylight conditions. This study also found pedestrian alcohol/drug involvement as the most frequent item in the dark-with-streetlight condition. This crash type is particularly associated with pedestrian action (crossing intersection/midblock), driver age (55-64 years), speed limit (30-35 mph), and specific area type (business with mixed residential area). Fatal pedestrian crashes are found to be associated with roadways with high-speed limits (>50 mph) during the dark without streetlight condition. Some other risk factors linked with high-speed limit related crashes are pedestrians walking with/against the traffic, presence of pedestrian dark clothing, pedestrian alcohol/drug involvement. The research findings are expected to provide an improved understanding of the underlying relationships between pedestrian crash risk factors and specific lighting conditions. Highway safety experts can utilize these findings to conduct a decision-making process for selecting effective countermeasures to reduce pedestrian crashes strategically. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03187v1-abstract-full').style.display = 'none'; document.getElementById('2211.03187v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> SAGE Journals (Volume 2676, Issue 6, 2022) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.02894">arXiv:2106.02894</a> <span> [<a href="https://arxiv.org/pdf/2106.02894">pdf</a>, <a href="https://arxiv.org/format/2106.02894">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> MoleHD: Ultra-Low-Cost Drug Discovery using Hyperdimensional Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+D">Dongning Ma</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+X">Xun Jiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.02894v3-abstract-short" style="display: inline;"> Modern drug discovery is often time-consuming, complex and cost-ineffective due to the large volume of molecular data and complicated molecular properties. Recently, machine learning algorithms have shown promising results in virtual screening of automated drug discovery by predicting molecular properties. While emerging learning methods such as graph neural networks and recurrent neural networks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02894v3-abstract-full').style.display = 'inline'; document.getElementById('2106.02894v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.02894v3-abstract-full" style="display: none;"> Modern drug discovery is often time-consuming, complex and cost-ineffective due to the large volume of molecular data and complicated molecular properties. Recently, machine learning algorithms have shown promising results in virtual screening of automated drug discovery by predicting molecular properties. While emerging learning methods such as graph neural networks and recurrent neural networks exhibit high accuracy, they are also notoriously computation-intensive and memory-intensive with operations such as feature embeddings or deep convolutions. In this paper, we propose a viable alternative to existing learning methods by presenting MoleHD, a method based on brain-inspired hyperdimensional computing (HDC) for molecular property prediction. We develop HDC encoders to project SMILES representation of a molecule into high-dimensional vectors that are used for HDC training and inference. We perform an extensive evaluation using 29 classification tasks from 3 widely-used molecule datasets (Clintox, BBBP, SIDER) under three splits methods (random, scaffold, and stratified). By an comprehensive comparison with 8 existing learning models including SOTA graph/recurrent neural networks, we show that MoleHD is able to achieve highest ROC-AUC score on random and scaffold splits on average across 3 datasets and achieve second-highest on stratified split. Importantly, MoleHD achieves such performance with significantly reduced computing cost and training efforts. To the best of our knowledge, this is the first HDC-based method for drug discovery. The promising results presented in this paper can potentially lead to a novel path in drug discovery research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02894v3-abstract-full').style.display = 'none'; document.getElementById('2106.02894v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.12770">arXiv:2105.12770</a> <span> [<a href="https://arxiv.org/pdf/2105.12770">pdf</a>, <a href="https://arxiv.org/format/2105.12770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ISVLSI51109.2021.00027">10.1109/ISVLSI51109.2021.00027 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> HDXplore: Automated Blackbox Testing of Brain-Inspired Hyperdimensional Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Rahul Thapa</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">Dongning Ma</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+X">Xun Jiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.12770v1-abstract-short" style="display: inline;"> Inspired by the way human brain works, the emerging hyperdimensional computing (HDC) is getting more and more attention. HDC is an emerging computing scheme based on the working mechanism of brain that computes with deep and abstract patterns of neural activity instead of actual numbers. Compared with traditional ML algorithms such as DNN, HDC is more memory-centric, granting it advantages such as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.12770v1-abstract-full').style.display = 'inline'; document.getElementById('2105.12770v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.12770v1-abstract-full" style="display: none;"> Inspired by the way human brain works, the emerging hyperdimensional computing (HDC) is getting more and more attention. HDC is an emerging computing scheme based on the working mechanism of brain that computes with deep and abstract patterns of neural activity instead of actual numbers. Compared with traditional ML algorithms such as DNN, HDC is more memory-centric, granting it advantages such as relatively smaller model size, less computation cost, and one-shot learning, making it a promising candidate in low-cost computing platforms. However, the robustness of HDC models have not been systematically studied. In this paper, we systematically expose the unexpected or incorrect behaviors of HDC models by developing HDXplore, a blackbox differential testing-based framework. We leverage multiple HDC models with similar functionality as cross-referencing oracles to avoid manual checking or labeling the original input. We also propose different perturbation mechanisms in HDXplore. HDXplore automatically finds thousands of incorrect corner case behaviors of the HDC model. We propose two retraining mechanisms and using the corner cases generated by HDXplore to retrain the HDC model, we can improve the model accuracy by up to 9%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.12770v1-abstract-full').style.display = 'none'; document.getElementById('2105.12770v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.11958">arXiv:2004.11958</a> <span> [<a href="https://arxiv.org/pdf/2004.11958">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The Plant Pathology 2020 challenge dataset to classify foliar disease of apples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thapa%2C+R">Ranjita Thapa</a>, <a href="/search/cs?searchtype=author&query=Snavely%2C+N">Noah Snavely</a>, <a href="/search/cs?searchtype=author&query=Belongie%2C+S">Serge Belongie</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+A">Awais Khan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.11958v1-abstract-short" style="display: inline;"> Apple orchards in the U.S. are under constant threat from a large number of pathogens and insects. Appropriate and timely deployment of disease management depends on early disease detection. Incorrect and delayed diagnosis can result in either excessive or inadequate use of chemicals, with increased production costs, environmental, and health impacts. We have manually captured 3,651 high-quality,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11958v1-abstract-full').style.display = 'inline'; document.getElementById('2004.11958v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.11958v1-abstract-full" style="display: none;"> Apple orchards in the U.S. are under constant threat from a large number of pathogens and insects. Appropriate and timely deployment of disease management depends on early disease detection. Incorrect and delayed diagnosis can result in either excessive or inadequate use of chemicals, with increased production costs, environmental, and health impacts. We have manually captured 3,651 high-quality, real-life symptom images of multiple apple foliar diseases, with variable illumination, angles, surfaces, and noise. A subset, expert-annotated to create a pilot dataset for apple scab, cedar apple rust, and healthy leaves, was made available to the Kaggle community for 'Plant Pathology Challenge'; part of the Fine-Grained Visual Categorization (FGVC) workshop at CVPR 2020 (Computer Vision and Pattern Recognition). We also trained an off-the-shelf convolutional neural network (CNN) on this data for disease classification and achieved 97% accuracy on a held-out test set. This dataset will contribute towards development and deployment of machine learning-based automated plant disease classification algorithms to ultimately realize fast and accurate disease detection. We will continue to add images to the pilot dataset for a larger, more comprehensive expert-annotated dataset for future Kaggle competitions and to explore more advanced methods for disease classification and quantification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11958v1-abstract-full').style.display = 'none'; document.getElementById('2004.11958v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 5 figures, Kaggle competition website: https://www.kaggle.com/c/plant-pathology-2020-fgvc7, CVPR fine-grained visual categorization website: https://sites.google.com/view/fgvc7/competitions</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.1; I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.06162">arXiv:1910.06162</a> <span> [<a href="https://arxiv.org/pdf/1910.06162">pdf</a>, <a href="https://arxiv.org/ps/1910.06162">ps</a>, <a href="https://arxiv.org/format/1910.06162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Generating Posets Beyond N </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fahrenberg%2C+U">Uli Fahrenberg</a>, <a href="/search/cs?searchtype=author&query=Johansen%2C+C">Christian Johansen</a>, <a href="/search/cs?searchtype=author&query=Struth%2C+G">Georg Struth</a>, <a href="/search/cs?searchtype=author&query=Thapa%2C+R+B">Ratan Bahadur Thapa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.06162v1-abstract-short" style="display: inline;"> We introduce iposets---posets with interfaces---equipped with a novel gluing composition along interfaces and the standard parallel composition. We study their basic algebraic properties as well as the hierarchy of gluing-parallel posets generated from singletons by finitary applications of the two compositions. We show that not only series-parallel posets, but also interval orders, which seem mor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.06162v1-abstract-full').style.display = 'inline'; document.getElementById('1910.06162v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.06162v1-abstract-full" style="display: none;"> We introduce iposets---posets with interfaces---equipped with a novel gluing composition along interfaces and the standard parallel composition. We study their basic algebraic properties as well as the hierarchy of gluing-parallel posets generated from singletons by finitary applications of the two compositions. We show that not only series-parallel posets, but also interval orders, which seem more interesting for modelling concurrent and distributed systems, can be generated, but not all posets. Generating posets is also important for constructing free algebras for concurrent semirings and Kleene algebras that allow compositional reasoning about such systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.06162v1-abstract-full').style.display = 'none'; document.getElementById('1910.06162v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository