CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 86 results for author: <span class="mathjax">Saeed, A</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Saeed%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Saeed, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Saeed%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Saeed, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Saeed%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Saeed%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Saeed%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00718">arXiv:2411.00718</a> <span> [<a href="https://arxiv.org/pdf/2411.00718">pdf</a>, <a href="https://arxiv.org/format/2411.00718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PedSleepMAE: Generative Model for Multimodal Pediatric Sleep Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pandey%2C+S+R">Saurav R. Pandey</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H">Harlin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00718v1-abstract-short" style="display: inline;"> Pediatric sleep is an important but often overlooked area in health informatics. We present PedSleepMAE, a generative model that fully leverages multimodal pediatric sleep signals including multichannel EEGs, respiratory signals, EOGs and EMG. This masked autoencoder-based model performs comparably to supervised learning models in sleep scoring and in the detection of apnea, hypopnea, EEG arousal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00718v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00718v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00718v1-abstract-full" style="display: none;"> Pediatric sleep is an important but often overlooked area in health informatics. We present PedSleepMAE, a generative model that fully leverages multimodal pediatric sleep signals including multichannel EEGs, respiratory signals, EOGs and EMG. This masked autoencoder-based model performs comparably to supervised learning models in sleep scoring and in the detection of apnea, hypopnea, EEG arousal and oxygen desaturation. Its embeddings are also shown to capture subtle differences in sleep signals coming from a rare genetic disorder. Furthermore, PedSleepMAE generates realistic signals that can be used for sleep segment retrieval, outlier detection, and missing channel imputation. This is the first general-purpose generative model trained on multiple types of pediatric sleep signals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00718v1-abstract-full').style.display = 'none'; document.getElementById('2411.00718v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21256">arXiv:2410.21256</a> <span> [<a href="https://arxiv.org/pdf/2410.21256">pdf</a>, <a href="https://arxiv.org/format/2410.21256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-modal AI for comprehensive breast cancer prognostication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Witowski%2C+J">Jan Witowski</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+K">Ken Zeng</a>, <a href="/search/cs?searchtype=author&query=Cappadona%2C+J">Joseph Cappadona</a>, <a href="/search/cs?searchtype=author&query=Elayoubi%2C+J">Jailan Elayoubi</a>, <a href="/search/cs?searchtype=author&query=Chiru%2C+E+D">Elena Diana Chiru</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+N">Nancy Chan</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Young-Joon Kang</a>, <a href="/search/cs?searchtype=author&query=Howard%2C+F">Frederick Howard</a>, <a href="/search/cs?searchtype=author&query=Ostrovnaya%2C+I">Irina Ostrovnaya</a>, <a href="/search/cs?searchtype=author&query=Fernandez-Granda%2C+C">Carlos Fernandez-Granda</a>, <a href="/search/cs?searchtype=author&query=Schnabel%2C+F">Freya Schnabel</a>, <a href="/search/cs?searchtype=author&query=Ozerdem%2C+U">Ugur Ozerdem</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangning Liu</a>, <a href="/search/cs?searchtype=author&query=Steinsnyder%2C+Z">Zoe Steinsnyder</a>, <a href="/search/cs?searchtype=author&query=Thakore%2C+N">Nitya Thakore</a>, <a href="/search/cs?searchtype=author&query=Sadic%2C+M">Mohammad Sadic</a>, <a href="/search/cs?searchtype=author&query=Yeung%2C+F">Frank Yeung</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+E">Elisa Liu</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+T">Theodore Hill</a>, <a href="/search/cs?searchtype=author&query=Swett%2C+B">Benjamin Swett</a>, <a href="/search/cs?searchtype=author&query=Rigau%2C+D">Danielle Rigau</a>, <a href="/search/cs?searchtype=author&query=Clayburn%2C+A">Andrew Clayburn</a>, <a href="/search/cs?searchtype=author&query=Speirs%2C+V">Valerie Speirs</a>, <a href="/search/cs?searchtype=author&query=Vetter%2C+M">Marcus Vetter</a>, <a href="/search/cs?searchtype=author&query=Sojak%2C+L">Lina Sojak</a> , et al. (26 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21256v1-abstract-short" style="display: inline;"> Treatment selection in breast cancer is guided by molecular subtypes and clinical characteristics. Recurrence risk assessment plays a crucial role in personalizing treatment. Current methods, including genomic assays, have limited accuracy and clinical utility, leading to suboptimal decisions for many patients. We developed a test for breast cancer patient stratification based on digital pathology… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21256v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21256v1-abstract-full" style="display: none;"> Treatment selection in breast cancer is guided by molecular subtypes and clinical characteristics. Recurrence risk assessment plays a crucial role in personalizing treatment. Current methods, including genomic assays, have limited accuracy and clinical utility, leading to suboptimal decisions for many patients. We developed a test for breast cancer patient stratification based on digital pathology and clinical characteristics using novel AI methods. Specifically, we utilized a vision transformer-based pan-cancer foundation model trained with self-supervised learning to extract features from digitized H&E-stained slides. These features were integrated with clinical data to form a multi-modal AI test predicting cancer recurrence and death. The test was developed and evaluated using data from a total of 8,161 breast cancer patients across 15 cohorts originating from seven countries. Of these, 3,502 patients from five cohorts were used exclusively for evaluation, while the remaining patients were used for training. Our test accurately predicted our primary endpoint, disease-free interval, in the five external cohorts (C-index: 0.71 [0.68-0.75], HR: 3.63 [3.02-4.37, p<0.01]). In a direct comparison (N=858), the AI test was more accurate than Oncotype DX, the standard-of-care 21-gene assay, with a C-index of 0.67 [0.61-0.74] versus 0.61 [0.49-0.73], respectively. Additionally, the AI test added independent information to Oncotype DX in a multivariate analysis (HR: 3.11 [1.91-5.09, p<0.01)]). The test demonstrated robust accuracy across all major breast cancer subtypes, including TNBC (C-index: 0.71 [0.62-0.81], HR: 3.81 [2.35-6.17, p=0.02]), where no diagnostic tools are currently recommended by clinical guidelines. These results suggest that our AI test can improve accuracy, extend applicability to a wider range of patients, and enhance access to treatment selection tools. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21256v1-abstract-full').style.display = 'none'; document.getElementById('2410.21256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19719">arXiv:2410.19719</a> <span> [<a href="https://arxiv.org/pdf/2410.19719">pdf</a>, <a href="https://arxiv.org/format/2410.19719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Arabic Music Classification and Generation using Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Elshaarawy%2C+M">Mohamed Elshaarawy</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Ashrakat Saeed</a>, <a href="/search/cs?searchtype=author&query=Sheta%2C+M">Mariam Sheta</a>, <a href="/search/cs?searchtype=author&query=Said%2C+A">Abdelrahman Said</a>, <a href="/search/cs?searchtype=author&query=Bakr%2C+A">Asem Bakr</a>, <a href="/search/cs?searchtype=author&query=Bahaa%2C+O">Omar Bahaa</a>, <a href="/search/cs?searchtype=author&query=Gomaa%2C+W">Walid Gomaa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19719v1-abstract-short" style="display: inline;"> This paper proposes a machine learning approach for classifying classical and new Egyptian music by composer and generating new similar music. The proposed system utilizes a convolutional neural network (CNN) for classification and a CNN autoencoder for generation. The dataset used in this project consists of new and classical Egyptian music pieces composed by different composers. To classify th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19719v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19719v1-abstract-full" style="display: none;"> This paper proposes a machine learning approach for classifying classical and new Egyptian music by composer and generating new similar music. The proposed system utilizes a convolutional neural network (CNN) for classification and a CNN autoencoder for generation. The dataset used in this project consists of new and classical Egyptian music pieces composed by different composers. To classify the music by composer, each sample is normalized and transformed into a mel spectrogram. The CNN model is trained on the dataset using the mel spectrograms as input features and the composer labels as output classes. The model achieves 81.4\% accuracy in classifying the music by composer, demonstrating the effectiveness of the proposed approach. To generate new music similar to the original pieces, a CNN autoencoder is trained on a similar dataset. The model is trained to encode the mel spectrograms of the original pieces into a lower-dimensional latent space and then decode them back into the original mel spectrogram. The generated music is produced by sampling from the latent space and decoding the samples back into mel spectrograms, which are then transformed into audio. In conclusion, the proposed system provides a promising approach to classifying and generating classical Egyptian music, which can be applied in various musical applications, such as music recommendation systems, music production, and music education. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19719v1-abstract-full').style.display = 'none'; document.getElementById('2410.19719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14464">arXiv:2410.14464</a> <span> [<a href="https://arxiv.org/pdf/2410.14464">pdf</a>, <a href="https://arxiv.org/format/2410.14464">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Electrocardiogram-Language Model for Few-Shot Question Answering with Meta Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jialu Tang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+T">Tong Xia</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yuan Lu</a>, <a href="/search/cs?searchtype=author&query=Mascolo%2C+C">Cecilia Mascolo</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14464v1-abstract-short" style="display: inline;"> Electrocardiogram (ECG) interpretation requires specialized expertise, often involving synthesizing insights from ECG signals with complex clinical queries posed in natural language. The scarcity of labeled ECG data coupled with the diverse nature of clinical inquiries presents a significant challenge for developing robust and adaptable ECG diagnostic systems. This work introduces a novel multimod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14464v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14464v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14464v1-abstract-full" style="display: none;"> Electrocardiogram (ECG) interpretation requires specialized expertise, often involving synthesizing insights from ECG signals with complex clinical queries posed in natural language. The scarcity of labeled ECG data coupled with the diverse nature of clinical inquiries presents a significant challenge for developing robust and adaptable ECG diagnostic systems. This work introduces a novel multimodal meta-learning method for few-shot ECG question answering, addressing the challenge of limited labeled data while leveraging the rich knowledge encoded within large language models (LLMs). Our LLM-agnostic approach integrates a pre-trained ECG encoder with a frozen LLM (e.g., LLaMA and Gemma) via a trainable fusion module, enabling the language model to reason about ECG data and generate clinically meaningful answers. Extensive experiments demonstrate superior generalization to unseen diagnostic tasks compared to supervised baselines, achieving notable performance even with limited ECG leads. For instance, in a 5-way 5-shot setting, our method using LLaMA-3.1-8B achieves accuracy of 84.6%, 77.3%, and 69.6% on single verify, choose and query question types, respectively. These results highlight the potential of our method to enhance clinical ECG interpretation by combining signal processing with the nuanced language understanding capabilities of LLMs, particularly in data-constrained scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14464v1-abstract-full').style.display = 'none'; document.getElementById('2410.14464v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12661">arXiv:2410.12661</a> <span> [<a href="https://arxiv.org/pdf/2410.12661">pdf</a>, <a href="https://arxiv.org/format/2410.12661">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Do They Understand What They Are Using? -- Assessing Perception and Usage of Biometrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mecke%2C+L">Lukas Mecke</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+A">Alia Saad</a>, <a href="/search/cs?searchtype=author&query=Prange%2C+S">Sarah Prange</a>, <a href="/search/cs?searchtype=author&query=Gruenefeld%2C+U">Uwe Gruenefeld</a>, <a href="/search/cs?searchtype=author&query=Schneegass%2C+S">Stefan Schneegass</a>, <a href="/search/cs?searchtype=author&query=Alt%2C+F">Florian Alt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12661v1-abstract-short" style="display: inline;"> In this paper we assess how well users know biometric authentication methods, how they perceive them, and if they have misconceptions about them. We present the results of an online survey that we conducted in two rounds (2019, N=57; and 2023, N=47) to understand the impact of the increasing availability of biometrics on their use and perception. The survey covered participants' general understand… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12661v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12661v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12661v1-abstract-full" style="display: none;"> In this paper we assess how well users know biometric authentication methods, how they perceive them, and if they have misconceptions about them. We present the results of an online survey that we conducted in two rounds (2019, N=57; and 2023, N=47) to understand the impact of the increasing availability of biometrics on their use and perception. The survey covered participants' general understanding of physiological and behavioral biometrics and their perceived usability and security. While most participants were able to name examples and stated that they use biometrics in their daily lives, they still had difficulties explaining the concepts behind them. We shed light on participants' misconceptions, their coping strategies with authentication failures and potential attacks, as well as their perception of the usability and security of biometrics in general. As such, our results can support the design of both further studies to gain deeper insights and future biometric interfaces to foster the informed use of biometrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12661v1-abstract-full').style.display = 'none'; document.getElementById('2410.12661v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05361">arXiv:2410.05361</a> <span> [<a href="https://arxiv.org/pdf/2410.05361">pdf</a>, <a href="https://arxiv.org/format/2410.05361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RespLLM: Unifying Audio and Text with Multimodal LLMs for Generalized Respiratory Health Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+T">Tong Xia</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Mascolo%2C+C">Cecilia Mascolo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05361v1-abstract-short" style="display: inline;"> The high incidence and mortality rates associated with respiratory diseases underscores the importance of early screening. Machine learning models can automate clinical consultations and auscultation, offering vital support in this area. However, the data involved, spanning demographics, medical history, symptoms, and respiratory audio, are heterogeneous and complex. Existing approaches are insuff… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05361v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05361v1-abstract-full" style="display: none;"> The high incidence and mortality rates associated with respiratory diseases underscores the importance of early screening. Machine learning models can automate clinical consultations and auscultation, offering vital support in this area. However, the data involved, spanning demographics, medical history, symptoms, and respiratory audio, are heterogeneous and complex. Existing approaches are insufficient and lack generalizability, as they typically rely on limited training data, basic fusion techniques, and task-specific models. In this paper, we propose RespLLM, a novel multimodal large language model (LLM) framework that unifies text and audio representations for respiratory health prediction. RespLLM leverages the extensive prior knowledge of pretrained LLMs and enables effective audio-text fusion through cross-modal attentions. Instruction tuning is employed to integrate diverse data from multiple sources, ensuring generalizability and versatility of the model. Experiments on five real-world datasets demonstrate that RespLLM outperforms leading baselines by an average of 4.6% on trained tasks, 7.9% on unseen datasets, and facilitates zero-shot predictions for new tasks. Our work lays the foundation for multimodal models that can perceive, listen to, and understand heterogeneous data, paving the way for scalable respiratory health diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05361v1-abstract-full').style.display = 'none'; document.getElementById('2410.05361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02131">arXiv:2410.02131</a> <span> [<a href="https://arxiv.org/pdf/2410.02131">pdf</a>, <a href="https://arxiv.org/format/2410.02131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> C-MELT: Contrastive Enhanced Masked Auto-Encoders for ECG-Language Pre-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pham%2C+M">Manh Pham</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">Dong Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02131v2-abstract-short" style="display: inline;"> Accurate interpretation of Electrocardiogram (ECG) signals is pivotal for diagnosing cardiovascular diseases. Integrating ECG signals with their accompanying textual reports holds immense potential to enhance clinical diagnostics through the combination of physiological data and qualitative insights. However, this integration faces significant challenges due to inherent modality disparities and th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02131v2-abstract-full').style.display = 'inline'; document.getElementById('2410.02131v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02131v2-abstract-full" style="display: none;"> Accurate interpretation of Electrocardiogram (ECG) signals is pivotal for diagnosing cardiovascular diseases. Integrating ECG signals with their accompanying textual reports holds immense potential to enhance clinical diagnostics through the combination of physiological data and qualitative insights. However, this integration faces significant challenges due to inherent modality disparities and the scarcity of labeled data for robust cross-modal learning. To address these obstacles, we propose C-MELT, a novel framework that pre-trains ECG and text data using a contrastive masked auto-encoder architecture. C-MELT uniquely combines the strengths of generative with enhanced discriminative capabilities to achieve robust cross-modal representations. This is accomplished through masked modality modeling, specialized loss functions, and an improved negative sampling strategy tailored for cross-modal alignment. Extensive experiments on five public datasets across diverse downstream tasks demonstrate that C-MELT significantly outperforms existing methods, achieving 15% and 2% increases in linear probing and zero-shot performance over state-of-the-art models, respectively. These results highlight the effectiveness of C-MELT, underscoring its potential to advance automated clinical diagnostics through multi-modal representations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02131v2-abstract-full').style.display = 'none'; document.getElementById('2410.02131v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17332">arXiv:2409.17332</a> <span> [<a href="https://arxiv.org/pdf/2409.17332">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Block Expanded DINORET: Adapting Natural Domain Foundation Models for Retinal Imaging Without Catastrophic Forgetting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zoellin%2C+J">Jay Zoellin</a>, <a href="/search/cs?searchtype=author&query=Merk%2C+C">Colin Merk</a>, <a href="/search/cs?searchtype=author&query=Buob%2C+M">Mischa Buob</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+A">Amr Saad</a>, <a href="/search/cs?searchtype=author&query=Giesser%2C+S">Samuel Giesser</a>, <a href="/search/cs?searchtype=author&query=Spitznagel%2C+T">Tahm Spitznagel</a>, <a href="/search/cs?searchtype=author&query=Turgut%2C+F">Ferhat Turgut</a>, <a href="/search/cs?searchtype=author&query=Santos%2C+R">Rui Santos</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yukun Zhou</a>, <a href="/search/cs?searchtype=author&query=Wagner%2C+S">Sigfried Wagner</a>, <a href="/search/cs?searchtype=author&query=Keane%2C+P+A">Pearse A. Keane</a>, <a href="/search/cs?searchtype=author&query=Tham%2C+Y+C">Yih Chung Tham</a>, <a href="/search/cs?searchtype=author&query=DeBuc%2C+D+C">Delia Cabrera DeBuc</a>, <a href="/search/cs?searchtype=author&query=Becker%2C+M+D">Matthias D. Becker</a>, <a href="/search/cs?searchtype=author&query=Somfai%2C+G+M">Gabor M. Somfai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17332v1-abstract-short" style="display: inline;"> Integrating deep learning into medical imaging is poised to greatly advance diagnostic methods but it faces challenges with generalizability. Foundation models, based on self-supervised learning, address these issues and improve data efficiency. Natural domain foundation models show promise for medical imaging, but systematic research evaluating domain adaptation, especially using self-supervised… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17332v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17332v1-abstract-full" style="display: none;"> Integrating deep learning into medical imaging is poised to greatly advance diagnostic methods but it faces challenges with generalizability. Foundation models, based on self-supervised learning, address these issues and improve data efficiency. Natural domain foundation models show promise for medical imaging, but systematic research evaluating domain adaptation, especially using self-supervised learning and parameter-efficient fine-tuning, remains underexplored. Additionally, little research addresses the issue of catastrophic forgetting during fine-tuning of foundation models. We adapted the DINOv2 vision transformer for retinal imaging classification tasks using self-supervised learning and generated two novel foundation models termed DINORET and BE DINORET. Publicly available color fundus photographs were employed for model development and subsequent fine-tuning for diabetic retinopathy staging and glaucoma detection. We introduced block expansion as a novel domain adaptation strategy and assessed the models for catastrophic forgetting. Models were benchmarked to RETFound, a state-of-the-art foundation model in ophthalmology. DINORET and BE DINORET demonstrated competitive performance on retinal imaging tasks, with the block expanded model achieving the highest scores on most datasets. Block expansion successfully mitigated catastrophic forgetting. Our few-shot learning studies indicated that DINORET and BE DINORET outperform RETFound in terms of data-efficiency. This study highlights the potential of adapting natural domain vision models to retinal imaging using self-supervised learning and block expansion. BE DINORET offers robust performance without sacrificing previously acquired capabilities. Our findings suggest that these methods could enable healthcare institutions to develop tailored vision models for their patient populations, enhancing global healthcare inclusivity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17332v1-abstract-full').style.display = 'none'; document.getElementById('2409.17332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">J.Zoellin, C. Merk and M. Buob contributed equally as shared-first authors. D. Cabrera DeBuc, M. D. Becker and G. M. Somfai contributed equally as senior authors for this work</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.0; I.2.10; J.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08788">arXiv:2409.08788</a> <span> [<a href="https://arxiv.org/pdf/2409.08788">pdf</a>, <a href="https://arxiv.org/format/2409.08788">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Electrocardiogram Report Generation and Question Answering via Retrieval-Augmented Self-Supervised Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jialu Tang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+T">Tong Xia</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yuan Lu</a>, <a href="/search/cs?searchtype=author&query=Mascolo%2C+C">Cecilia Mascolo</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08788v1-abstract-short" style="display: inline;"> Interpreting electrocardiograms (ECGs) and generating comprehensive reports remain challenging tasks in cardiology, often requiring specialized expertise and significant time investment. To address these critical issues, we propose ECG-ReGen, a retrieval-based approach for ECG-to-text report generation and question answering. Our method leverages a self-supervised learning for the ECG encoder, ena… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08788v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08788v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08788v1-abstract-full" style="display: none;"> Interpreting electrocardiograms (ECGs) and generating comprehensive reports remain challenging tasks in cardiology, often requiring specialized expertise and significant time investment. To address these critical issues, we propose ECG-ReGen, a retrieval-based approach for ECG-to-text report generation and question answering. Our method leverages a self-supervised learning for the ECG encoder, enabling efficient similarity searches and report retrieval. By combining pre-training with dynamic retrieval and Large Language Model (LLM)-based refinement, ECG-ReGen effectively analyzes ECG data and answers related queries, with the potential of improving patient care. Experiments conducted on the PTB-XL and MIMIC-IV-ECG datasets demonstrate superior performance in both in-domain and cross-domain scenarios for report generation. Furthermore, our approach exhibits competitive performance on ECG-QA dataset compared to fully supervised methods when utilizing off-the-shelf LLMs for zero-shot question answering. This approach, effectively combining self-supervised encoder and LLMs, offers a scalable and efficient solution for accurate ECG interpretation, holding significant potential to enhance clinical decision-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08788v1-abstract-full').style.display = 'none'; document.getElementById('2409.08788v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02189">arXiv:2409.02189</a> <span> [<a href="https://arxiv.org/pdf/2409.02189">pdf</a>, <a href="https://arxiv.org/format/2409.02189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Collaboratively Learning Federated Models from Noisy Decentralized Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoyuan Li</a>, <a href="/search/cs?searchtype=author&query=Funk%2C+M">Mathias Funk</a>, <a href="/search/cs?searchtype=author&query=G%C3%BCrel%2C+N+M">Nezihe Merve G眉rel</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02189v1-abstract-short" style="display: inline;"> Federated learning (FL) has emerged as a prominent method for collaboratively training machine learning models using local data from edge devices, all while keeping data decentralized. However, accounting for the quality of data contributed by local clients remains a critical challenge in FL, as local data are often susceptible to corruption by various forms of noise and perturbations, which compr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02189v1-abstract-full').style.display = 'inline'; document.getElementById('2409.02189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02189v1-abstract-full" style="display: none;"> Federated learning (FL) has emerged as a prominent method for collaboratively training machine learning models using local data from edge devices, all while keeping data decentralized. However, accounting for the quality of data contributed by local clients remains a critical challenge in FL, as local data are often susceptible to corruption by various forms of noise and perturbations, which compromise the aggregation process and lead to a subpar global model. In this work, we focus on addressing the problem of noisy data in the input space, an under-explored area compared to the label noise. We propose a comprehensive assessment of client input in the gradient space, inspired by the distinct disparity observed between the density of gradient norm distributions of models trained on noisy and clean input data. Based on this observation, we introduce a straightforward yet effective approach to identify clients with low-quality data at the initial stage of FL. Furthermore, we propose a noise-aware FL aggregation method, namely Federated Noise-Sifting (FedNS), which can be used as a plug-in approach in conjunction with widely used FL strategies. Our extensive evaluation on diverse benchmark datasets under different federated settings demonstrates the efficacy of FedNS. Our method effortlessly integrates with existing FL strategies, enhancing the global model's performance by up to 13.68% in IID and 15.85% in non-IID settings when learning from noisy decentralized data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02189v1-abstract-full').style.display = 'none'; document.getElementById('2409.02189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06133">arXiv:2407.06133</a> <span> [<a href="https://arxiv.org/pdf/2407.06133">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hebrew letters Detection and Cuneiform tablets Classification by using the yolov8 computer vision model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+E+A">Elaf A. Saeed</a>, <a href="/search/cs?searchtype=author&query=Jasim%2C+A+D">Ammar D. Jasim</a>, <a href="/search/cs?searchtype=author&query=Malik%2C+M+A+A">Munther A. Abdul Malik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06133v1-abstract-short" style="display: inline;"> Cuneiform writing, an old art style, allows us to see into the past. Aside from Egyptian hieroglyphs, the cuneiform script is one of the oldest writing systems. Many historians place Hebrew's origins in antiquity. For example, we used the same approach to decipher the cuneiform languages; after learning how to decipher one old language, we would visit an archaeologist to learn how to decipher any… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06133v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06133v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06133v1-abstract-full" style="display: none;"> Cuneiform writing, an old art style, allows us to see into the past. Aside from Egyptian hieroglyphs, the cuneiform script is one of the oldest writing systems. Many historians place Hebrew's origins in antiquity. For example, we used the same approach to decipher the cuneiform languages; after learning how to decipher one old language, we would visit an archaeologist to learn how to decipher any other ancient language. We propose a deep-learning-based sign detector method to speed up this procedure to identify and group cuneiform tablet images according to Hebrew letter content. The Hebrew alphabet is notoriously difficult and costly to gather the training data needed for deep learning, which entails enclosing Hebrew characters in boxes. We solve this problem using pre-existing transliterations and a sign-by-sign representation of the tablet's content in Latin characters. We recommend one of the supervised approaches because these do not include sign localization: We Find the transliteration signs in the tablet photographs by comparing them to their corresponding transliterations. Then, retrain the sign detector using these localized signs instead of utilizing annotations. Afterward, a more effective sign detector enhances the alignment quality. Consequently, this research aims to use the Yolov8 object identification pretraining model to identify Hebrew characters and categorize the cuneiform tablets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06133v1-abstract-full').style.display = 'none'; document.getElementById('2407.06133v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05189">arXiv:2407.05189</a> <span> [<a href="https://arxiv.org/pdf/2407.05189">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Language Learning through Technology: Introducing a New English-Azerbaijani (Arabic Script) Parallel Corpus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khiarak%2C+J+N">Jalil Nourmohammadi Khiarak</a>, <a href="/search/cs?searchtype=author&query=Ahmadi%2C+A">Ammar Ahmadi</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+T+A">Taher Ak-bari Saeed</a>, <a href="/search/cs?searchtype=author&query=Asgari-Chenaghlu%2C+M">Meysam Asgari-Chenaghlu</a>, <a href="/search/cs?searchtype=author&query=Atabay%2C+T">To臒rul Atabay</a>, <a href="/search/cs?searchtype=author&query=Karimi%2C+M+R+B">Mohammad Reza Baghban Karimi</a>, <a href="/search/cs?searchtype=author&query=Ceferli%2C+I">Ismail Ceferli</a>, <a href="/search/cs?searchtype=author&query=Hasanvand%2C+F">Farzad Hasanvand</a>, <a href="/search/cs?searchtype=author&query=Mousavi%2C+S+M">Seyed Mahboub Mousavi</a>, <a href="/search/cs?searchtype=author&query=Noshad%2C+M">Morteza Noshad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05189v1-abstract-short" style="display: inline;"> This paper introduces a pioneering English-Azerbaijani (Arabic Script) parallel corpus, designed to bridge the technological gap in language learning and machine translation (MT) for under-resourced languages. Consisting of 548,000 parallel sentences and approximately 9 million words per language, this dataset is derived from diverse sources such as news articles and holy texts, aiming to enhance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05189v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05189v1-abstract-full" style="display: none;"> This paper introduces a pioneering English-Azerbaijani (Arabic Script) parallel corpus, designed to bridge the technological gap in language learning and machine translation (MT) for under-resourced languages. Consisting of 548,000 parallel sentences and approximately 9 million words per language, this dataset is derived from diverse sources such as news articles and holy texts, aiming to enhance natural language processing (NLP) applications and language education technology. This corpus marks a significant step forward in the realm of linguistic resources, particularly for Turkic languages, which have lagged in the neural machine translation (NMT) revolution. By presenting the first comprehensive case study for the English-Azerbaijani (Arabic Script) language pair, this work underscores the transformative potential of NMT in low-resource contexts. The development and utilization of this corpus not only facilitate the advancement of machine translation systems tailored for specific linguistic needs but also promote inclusive language learning through technology. The findings demonstrate the corpus's effectiveness in training deep learning MT systems and underscore its role as an essential asset for researchers and educators aiming to foster bilingual education and multilingual communication. This research covers the way for future explorations into NMT applications for languages lacking substantial digital resources, thereby enhancing global language education frameworks. The Python package of our code is available at https://pypi.org/project/chevir-kartalol/, and we also have a website accessible at https://translate.kartalol.com/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05189v1-abstract-full').style.display = 'none'; document.getElementById('2407.05189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted and published at NeTTT 2024 Conf</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03439">arXiv:2407.03439</a> <span> [<a href="https://arxiv.org/pdf/2407.03439">pdf</a>, <a href="https://arxiv.org/format/2407.03439">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DACB-Net: Dual Attention Guided Compact Bilinear Convolution Neural Network for Skin Disease Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahmad%2C+B">Belal Ahmad</a>, <a href="/search/cs?searchtype=author&query=Usama%2C+M">Mohd Usama</a>, <a href="/search/cs?searchtype=author&query=Ahmad%2C+T">Tanvir Ahmad</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Adnan Saeed</a>, <a href="/search/cs?searchtype=author&query=Khatoon%2C+S">Shabnam Khatoon</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Min Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03439v1-abstract-short" style="display: inline;"> This paper introduces the three-branch Dual Attention-Guided Compact Bilinear CNN (DACB-Net) by focusing on learning from disease-specific regions to enhance accuracy and alignment. A global branch compensates for lost discriminative features, generating Attention Heat Maps (AHM) for relevant cropped regions. Finally, the last pooling layers of global and local branches are concatenated for fine-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03439v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03439v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03439v1-abstract-full" style="display: none;"> This paper introduces the three-branch Dual Attention-Guided Compact Bilinear CNN (DACB-Net) by focusing on learning from disease-specific regions to enhance accuracy and alignment. A global branch compensates for lost discriminative features, generating Attention Heat Maps (AHM) for relevant cropped regions. Finally, the last pooling layers of global and local branches are concatenated for fine-tuning, which offers a comprehensive solution to the challenges posed by skin disease diagnosis. Although current CNNs employ Stochastic Gradient Descent (SGD) for discriminative feature learning, using distinct pairs of local image patches to compute gradients and incorporating a modulation factor in the loss for focusing on complex data during training. However, this approach can lead to dataset imbalance, weight adjustments, and vulnerability to overfitting. The proposed solution combines two supervision branches and a novel loss function to address these issues, enhancing performance and interpretability. The framework integrates data augmentation, transfer learning, and fine-tuning to tackle data imbalance to improve classification performance, and reduce computational costs. Simulations on the HAM10000 and ISIC2019 datasets demonstrate the effectiveness of this approach, showcasing a 2.59% increase in accuracy compared to the state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03439v1-abstract-full').style.display = 'none'; document.getElementById('2407.03439v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 18 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15652">arXiv:2406.15652</a> <span> [<a href="https://arxiv.org/pdf/2406.15652">pdf</a>, <a href="https://arxiv.org/format/2406.15652">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3656409">10.1145/3656409 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GenSQL: A Probabilistic Programming System for Querying Generative Models of Database Tables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huot%2C+M">Mathieu Huot</a>, <a href="/search/cs?searchtype=author&query=Ghavami%2C+M">Matin Ghavami</a>, <a href="/search/cs?searchtype=author&query=Lew%2C+A+K">Alexander K. Lew</a>, <a href="/search/cs?searchtype=author&query=Schaechtle%2C+U">Ulrich Schaechtle</a>, <a href="/search/cs?searchtype=author&query=Freer%2C+C+E">Cameron E. Freer</a>, <a href="/search/cs?searchtype=author&query=Shelby%2C+Z">Zane Shelby</a>, <a href="/search/cs?searchtype=author&query=Rinard%2C+M+C">Martin C. Rinard</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+F+A">Feras A. Saad</a>, <a href="/search/cs?searchtype=author&query=Mansinghka%2C+V+K">Vikash K. Mansinghka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15652v1-abstract-short" style="display: inline;"> This article presents GenSQL, a probabilistic programming system for querying probabilistic generative models of database tables. By augmenting SQL with only a few key primitives for querying probabilistic models, GenSQL enables complex Bayesian inference workflows to be concisely implemented. GenSQL's query planner rests on a unified programmatic interface for interacting with probabilistic model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15652v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15652v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15652v1-abstract-full" style="display: none;"> This article presents GenSQL, a probabilistic programming system for querying probabilistic generative models of database tables. By augmenting SQL with only a few key primitives for querying probabilistic models, GenSQL enables complex Bayesian inference workflows to be concisely implemented. GenSQL's query planner rests on a unified programmatic interface for interacting with probabilistic models of tabular data, which makes it possible to use models written in a variety of probabilistic programming languages that are tailored to specific workflows. Probabilistic models may be automatically learned via probabilistic program synthesis, hand-designed, or a combination of both. GenSQL is formalized using a novel type system and denotational semantics, which together enable us to establish proofs that precisely characterize its soundness guarantees. We evaluate our system on two case real-world studies -- an anomaly detection in clinical trials and conditional synthetic data generation for a virtual wet lab -- and show that GenSQL more accurately captures the complexity of the data as compared to common baselines. We also show that the declarative syntax in GenSQL is more concise and less error-prone as compared to several alternatives. Finally, GenSQL delivers a 1.7-6.8x speedup compared to its closest competitor on a representative benchmark set and runs in comparable time to hand-written code, in part due to its reusable optimizations and code specialization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15652v1-abstract-full').style.display = 'none'; document.getElementById('2406.15652v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">54 pages, 30 figures, 1 table, published at PLDI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12658">arXiv:2406.12658</a> <span> [<a href="https://arxiv.org/pdf/2406.12658">pdf</a>, <a href="https://arxiv.org/format/2406.12658">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Federated Learning with a Single Shared Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Soni%2C+S">Sunny Soni</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Asano%2C+Y+M">Yuki M. Asano</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12658v1-abstract-short" style="display: inline;"> Federated Learning (FL) enables multiple machines to collaboratively train a machine learning model without sharing of private training data. Yet, especially for heterogeneous models, a key bottleneck remains the transfer of knowledge gained from each client model with the server. One popular method, FedDF, uses distillation to tackle this task with the use of a common, shared dataset on which pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12658v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12658v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12658v1-abstract-full" style="display: none;"> Federated Learning (FL) enables multiple machines to collaboratively train a machine learning model without sharing of private training data. Yet, especially for heterogeneous models, a key bottleneck remains the transfer of knowledge gained from each client model with the server. One popular method, FedDF, uses distillation to tackle this task with the use of a common, shared dataset on which predictions are exchanged. However, in many contexts such a dataset might be difficult to acquire due to privacy and the clients might not allow for storage of a large shared dataset. To this end, in this paper, we introduce a new method that improves this knowledge distillation method to only rely on a single shared image between clients and server. In particular, we propose a novel adaptive dataset pruning algorithm that selects the most informative crops generated from only a single image. With this, we show that federated learning with distillation under a limited shared dataset budget works better by using a single image compared to multiple individual ones. Finally, we extend our approach to allow for training heterogeneous client architectures by incorporating a non-uniform distillation schedule and client-model mirroring on the server side. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12658v1-abstract-full').style.display = 'none'; document.getElementById('2406.12658v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 Pages, 3 Figures, Appendix 4 Pages, CVPRW 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2024, pp. 7782-7790 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00696">arXiv:2406.00696</a> <span> [<a href="https://arxiv.org/pdf/2406.00696">pdf</a>, <a href="https://arxiv.org/ps/2406.00696">ps</a>, <a href="https://arxiv.org/format/2406.00696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Bilinear-Convolutional Neural Network Using a Matrix Similarity-based Joint Loss Function for Skin Disease Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahmad%2C+B">Belal Ahmad</a>, <a href="/search/cs?searchtype=author&query=Usama%2C+M">Mohd Usama</a>, <a href="/search/cs?searchtype=author&query=Ahmad%2C+T">Tanvir Ahmad</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Adnan Saeed</a>, <a href="/search/cs?searchtype=author&query=Khatoon%2C+S">Shabnam Khatoon</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Long Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00696v1-abstract-short" style="display: inline;"> In this study, we proposed a model for skin disease classification using a Bilinear Convolutional Neural Network (BCNN) with a Constrained Triplet Network (CTN). BCNN can capture rich spatial interactions between features in image data. This computes the outer product of feature vectors from two different CNNs by a bilinear pooling. The resulting features encode second-order statistics, enabling t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00696v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00696v1-abstract-full" style="display: none;"> In this study, we proposed a model for skin disease classification using a Bilinear Convolutional Neural Network (BCNN) with a Constrained Triplet Network (CTN). BCNN can capture rich spatial interactions between features in image data. This computes the outer product of feature vectors from two different CNNs by a bilinear pooling. The resulting features encode second-order statistics, enabling the network to capture more complex relationships between different channels and spatial locations. The CTN employs the Triplet Loss Function (TLF) by using a new loss layer that is added at the end of the architecture called the Constrained Triplet Loss (CTL) layer. This is done to obtain two significant learning objectives: inter-class categorization and intra-class concentration with their deep features as often as possible, which can be effective for skin disease classification. The proposed model is trained to extract the intra-class features from a deep network and accordingly increases the distance between these features, improving the model's performance. The model achieved a mean accuracy of 93.72%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00696v1-abstract-full').style.display = 'none'; document.getElementById('2406.00696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 11 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.10561">arXiv:2403.10561</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A collection of the accepted papers for the Human-Centric Representation Learning workshop at AAAI 2024 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Spathis%2C+D">Dimitris Spathis</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Etemad%2C+A">Ali Etemad</a>, <a href="/search/cs?searchtype=author&query=Tonekaboni%2C+S">Sana Tonekaboni</a>, <a href="/search/cs?searchtype=author&query=Laskaridis%2C+S">Stefanos Laskaridis</a>, <a href="/search/cs?searchtype=author&query=Deldari%2C+S">Shohreh Deldari</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+C+I">Chi Ian Tang</a>, <a href="/search/cs?searchtype=author&query=Schwab%2C+P">Patrick Schwab</a>, <a href="/search/cs?searchtype=author&query=Tailor%2C+S">Shyam Tailor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.10561v1-abstract-short" style="display: inline;"> This non-archival index is not complete, as some accepted papers chose to opt-out of inclusion. The list of all accepted papers is available on the workshop website. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.10561v1-abstract-full" style="display: none;"> This non-archival index is not complete, as some accepted papers chose to opt-out of inclusion. The list of all accepted papers is available on the workshop website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10561v1-abstract-full').style.display = 'none'; document.getElementById('2403.10561v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.16486">arXiv:2402.16486</a> <span> [<a href="https://arxiv.org/pdf/2402.16486">pdf</a>, <a href="https://arxiv.org/format/2402.16486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Known and Novel Aircraft Recognition -- A Shift from Classification to Similarity Learning for Combat Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Ahmad Saeed</a>, <a href="/search/cs?searchtype=author&query=Atif%2C+H+B">Haasha Bin Atif</a>, <a href="/search/cs?searchtype=author&query=Habib%2C+U">Usman Habib</a>, <a href="/search/cs?searchtype=author&query=Bilal%2C+M">Mohsin Bilal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.16486v1-abstract-short" style="display: inline;"> Precise aircraft recognition in low-resolution remote sensing imagery is a challenging yet crucial task in aviation, especially combat identification. This research addresses this problem with a novel, scalable, and AI-driven solution. The primary hurdle in combat identification in remote sensing imagery is the accurate recognition of Novel/Unknown types of aircraft in addition to Known types. Tra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16486v1-abstract-full').style.display = 'inline'; document.getElementById('2402.16486v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.16486v1-abstract-full" style="display: none;"> Precise aircraft recognition in low-resolution remote sensing imagery is a challenging yet crucial task in aviation, especially combat identification. This research addresses this problem with a novel, scalable, and AI-driven solution. The primary hurdle in combat identification in remote sensing imagery is the accurate recognition of Novel/Unknown types of aircraft in addition to Known types. Traditional methods, human expert-driven combat identification and image classification, fall short in identifying Novel classes. Our methodology employs similarity learning to discern features of a broad spectrum of military and civilian aircraft. It discerns both Known and Novel aircraft types, leveraging metric learning for the identification and supervised few-shot learning for aircraft type classification. To counter the challenge of limited low-resolution remote sensing data, we propose an end-to-end framework that adapts to the diverse and versatile process of military aircraft recognition by training a generalized embedder in fully supervised manner. Comparative analysis with earlier aircraft image classification methods shows that our approach is effective for aircraft image classification (F1-score Aircraft Type of 0.861) and pioneering for quantifying the identification of Novel types (F1-score Bipartitioning of 0.936). The proposed methodology effectively addresses inherent challenges in remote sensing data, thereby setting new standards in dataset quality. The research opens new avenues for domain experts and demonstrates unique capabilities in distinguishing various aircraft types, contributing to a more robust, domain-adapted potential for real-time aircraft recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16486v1-abstract-full').style.display = 'none'; document.getElementById('2402.16486v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.14211">arXiv:2401.14211</a> <span> [<a href="https://arxiv.org/pdf/2401.14211">pdf</a>, <a href="https://arxiv.org/format/2401.14211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Communication-Efficient Federated Learning through Adaptive Weight Clustering and Server-Side Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsouvalas%2C+V">Vasileios Tsouvalas</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Ozcelebi%2C+T">Tanir Ozcelebi</a>, <a href="/search/cs?searchtype=author&query=Meratnia%2C+N">Nirvana Meratnia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.14211v3-abstract-short" style="display: inline;"> Federated Learning (FL) is a promising technique for the collaborative training of deep neural networks across multiple devices while preserving data privacy. Despite its potential benefits, FL is hindered by excessive communication costs due to repeated server-client communication during training. To address this challenge, model compression techniques, such as sparsification and weight clusterin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14211v3-abstract-full').style.display = 'inline'; document.getElementById('2401.14211v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.14211v3-abstract-full" style="display: none;"> Federated Learning (FL) is a promising technique for the collaborative training of deep neural networks across multiple devices while preserving data privacy. Despite its potential benefits, FL is hindered by excessive communication costs due to repeated server-client communication during training. To address this challenge, model compression techniques, such as sparsification and weight clustering are applied, which often require modifying the underlying model aggregation schemes or involve cumbersome hyperparameter tuning, with the latter not only adjusts the model's compression rate but also limits model's potential for continuous improvement over growing data. In this paper, we propose FedCompress, a novel approach that combines dynamic weight clustering and server-side knowledge distillation to reduce communication costs while learning highly generalizable models. Through a comprehensive evaluation on diverse public datasets, we demonstrate the efficacy of our approach compared to baselines in terms of communication costs and inference speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14211v3-abstract-full').style.display = 'none'; document.getElementById('2401.14211v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 2 figures, Accepted on ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.14107">arXiv:2401.14107</a> <span> [<a href="https://arxiv.org/pdf/2401.14107">pdf</a>, <a href="https://arxiv.org/format/2401.14107">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Learning under Label Noise through Few-Shot Human-in-the-Loop Refinement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Spathis%2C+D">Dimitris Spathis</a>, <a href="/search/cs?searchtype=author&query=Oh%2C+J">Jungwoo Oh</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+E">Edward Choi</a>, <a href="/search/cs?searchtype=author&query=Etemad%2C+A">Ali Etemad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.14107v1-abstract-short" style="display: inline;"> Wearable technologies enable continuous monitoring of various health metrics, such as physical activity, heart rate, sleep, and stress levels. A key challenge with wearable data is obtaining quality labels. Unlike modalities like video where the videos themselves can be effectively used to label objects or events, wearable data do not contain obvious cues about the physical manifestation of the us… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14107v1-abstract-full').style.display = 'inline'; document.getElementById('2401.14107v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.14107v1-abstract-full" style="display: none;"> Wearable technologies enable continuous monitoring of various health metrics, such as physical activity, heart rate, sleep, and stress levels. A key challenge with wearable data is obtaining quality labels. Unlike modalities like video where the videos themselves can be effectively used to label objects or events, wearable data do not contain obvious cues about the physical manifestation of the users and usually require rich metadata. As a result, label noise can become an increasingly thorny issue when labeling such data. In this paper, we propose a novel solution to address noisy label learning, entitled Few-Shot Human-in-the-Loop Refinement (FHLR). Our method initially learns a seed model using weak labels. Next, it fine-tunes the seed model using a handful of expert corrections. Finally, it achieves better generalizability and robustness by merging the seed and fine-tuned models via weighted parameter averaging. We evaluate our approach on four challenging tasks and datasets, and compare it against eight competitive baselines designed to deal with noisy labels. We show that FHLR achieves significantly better performance when learning from noisy labels and achieves state-of-the-art by a large margin, with up to 19% accuracy improvement under symmetric and asymmetric noise. Notably, we find that FHLR is particularly robust to increased label noise, unlike prior works that suffer from severe performance degradation. Our work not only achieves better generalization in high-stakes health sensing benchmarks but also sheds light on how noise affects commonly-used models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14107v1-abstract-full').style.display = 'none'; document.getElementById('2401.14107v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09077">arXiv:2401.09077</a> <span> [<a href="https://arxiv.org/pdf/2401.09077">pdf</a>, <a href="https://arxiv.org/format/2401.09077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3610978.3640635">10.1145/3610978.3640635 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hands-On Robotics: Enabling Communication Through Direct Gesture Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pascher%2C+M">Max Pascher</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+A">Alia Saad</a>, <a href="/search/cs?searchtype=author&query=Liebers%2C+J">Jonathan Liebers</a>, <a href="/search/cs?searchtype=author&query=Heger%2C+R">Roman Heger</a>, <a href="/search/cs?searchtype=author&query=Gerken%2C+J">Jens Gerken</a>, <a href="/search/cs?searchtype=author&query=Schneegass%2C+S">Stefan Schneegass</a>, <a href="/search/cs?searchtype=author&query=Gruene%2C+U">Uwe Gruene</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09077v1-abstract-short" style="display: inline;"> Effective Human-Robot Interaction (HRI) is fundamental to seamlessly integrating robotic systems into our daily lives. However, current communication modes require additional technological interfaces, which can be cumbersome and indirect. This paper presents a novel approach, using direct motion-based communication by moving a robot's end effector. Our strategy enables users to communicate with a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09077v1-abstract-full').style.display = 'inline'; document.getElementById('2401.09077v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09077v1-abstract-full" style="display: none;"> Effective Human-Robot Interaction (HRI) is fundamental to seamlessly integrating robotic systems into our daily lives. However, current communication modes require additional technological interfaces, which can be cumbersome and indirect. This paper presents a novel approach, using direct motion-based communication by moving a robot's end effector. Our strategy enables users to communicate with a robot by using four distinct gestures -- two handshakes ('formal' and 'informal') and two letters ('W' and 'S'). As a proof-of-concept, we conducted a user study with 16 participants, capturing subjective experience ratings and objective data for training machine learning classifiers. Our findings show that the four different gestures performed by moving the robot's end effector can be distinguished with close to 100% accuracy. Our research offers implications for the design of future HRI interfaces, suggesting that motion-based interaction can empower human operators to communicate directly with robots, removing the necessity for additional hardware. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09077v1-abstract-full').style.display = 'none'; document.getElementById('2401.09077v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Companion of the 2024 ACM/IEEE International Conference on Human-Robot Interaction</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.07981">arXiv:2312.07981</a> <span> [<a href="https://arxiv.org/pdf/2312.07981">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.ymssp.2024.111481">10.1016/j.ymssp.2024.111481 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Time Series Diffusion Method: A Denoising Diffusion Probabilistic Model for Vibration Signal Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+H">Haiming Yi</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+L">Lei Hou</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yuhong Jin</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+N+A">Nasser A. Saeed</a>, <a href="/search/cs?searchtype=author&query=Kandil%2C+A">Ali Kandil</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+H">Hao Duan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.07981v2-abstract-short" style="display: inline;"> Diffusion models have demonstrated powerful data generation capabilities in various research fields such as image generation. However, in the field of vibration signal generation, the criteria for evaluating the quality of the generated signal are different from that of image generation and there is a fundamental difference between them. At present, there is no research on the ability of diffusion… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07981v2-abstract-full').style.display = 'inline'; document.getElementById('2312.07981v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.07981v2-abstract-full" style="display: none;"> Diffusion models have demonstrated powerful data generation capabilities in various research fields such as image generation. However, in the field of vibration signal generation, the criteria for evaluating the quality of the generated signal are different from that of image generation and there is a fundamental difference between them. At present, there is no research on the ability of diffusion model to generate vibration signal. In this paper, a Time Series Diffusion Method (TSDM) is proposed for vibration signal generation, leveraging the foundational principles of diffusion models. The TSDM uses an improved U-net architecture with attention block, ResBlock and TimeEmbedding to effectively segment and extract features from one-dimensional time series data. It operates based on forward diffusion and reverse denoising processes for time-series generation. Experimental validation is conducted using single-frequency, multi-frequency datasets, and bearing fault datasets. The results show that TSDM can accurately generate the single-frequency and multi-frequency features in the time series and retain the basic frequency features for the diffusion generation results of the bearing fault series. It is also found that the original DDPM could not generate high quality vibration signals, but the improved U-net in TSDM, which applied the combination of attention block and ResBlock, could effectively improve the quality of vibration signal generation. Finally, TSDM is applied to the small sample fault diagnosis of three public bearing fault datasets, and the results show that the accuracy of small sample fault diagnosis of the three datasets is improved by 32.380%, 18.355% and 9.298% at most, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07981v2-abstract-full').style.display = 'none'; document.getElementById('2312.07981v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Mechanical Systems and Signal Processing, 2024, 216: 111481 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.17299">arXiv:2311.17299</a> <span> [<a href="https://arxiv.org/pdf/2311.17299">pdf</a>, <a href="https://arxiv.org/format/2311.17299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Federated Fine-Tuning of Foundation Models via Probabilistic Masking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsouvalas%2C+V">Vasileios Tsouvalas</a>, <a href="/search/cs?searchtype=author&query=Asano%2C+Y">Yuki Asano</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.17299v1-abstract-short" style="display: inline;"> Foundation Models (FMs) have revolutionized machine learning with their adaptability and high performance across tasks; yet, their integration into Federated Learning (FL) is challenging due to substantial communication overhead from their extensive parameterization. Current communication-efficient FL strategies, such as gradient compression, reduce bitrates to around $1$ bit-per-parameter (bpp).… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.17299v1-abstract-full').style.display = 'inline'; document.getElementById('2311.17299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.17299v1-abstract-full" style="display: none;"> Foundation Models (FMs) have revolutionized machine learning with their adaptability and high performance across tasks; yet, their integration into Federated Learning (FL) is challenging due to substantial communication overhead from their extensive parameterization. Current communication-efficient FL strategies, such as gradient compression, reduce bitrates to around $1$ bit-per-parameter (bpp). However, these approaches fail to harness the characteristics of FMs, with their large number of parameters still posing a challenge to communication efficiency, even at these bitrate regimes. In this work, we present DeltaMask, a novel method that efficiently fine-tunes FMs in FL at an ultra-low bitrate, well below 1 bpp. DeltaMask employs stochastic masking to detect highly effective subnetworks within FMs and leverage stochasticity and sparsity in client masks to compress updates into a compact grayscale image using probabilistic filters, deviating from traditional weight training approaches. Our comprehensive evaluations across various datasets and architectures demonstrate DeltaMask efficiently achieves bitrates as low as 0.09 bpp, enhancing communication efficiency while maintaining FMs performance, as measured on 8 datasets and 5 pre-trained models of various network architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.17299v1-abstract-full').style.display = 'none'; document.getElementById('2311.17299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.11714">arXiv:2311.11714</a> <span> [<a href="https://arxiv.org/pdf/2311.11714">pdf</a>, <a href="https://arxiv.org/format/2311.11714">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> On the Importance of Large Objects in CNN Based Object Detection Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saad%2C+A+B">Ahmed Ben Saad</a>, <a href="/search/cs?searchtype=author&query=Facciolo%2C+G">Gabriele Facciolo</a>, <a href="/search/cs?searchtype=author&query=Davy%2C+A">Axel Davy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.11714v1-abstract-short" style="display: inline;"> Object detection models, a prominent class of machine learning algorithms, aim to identify and precisely locate objects in images or videos. However, this task might yield uneven performances sometimes caused by the objects sizes and the quality of the images and labels used for training. In this paper, we highlight the importance of large objects in learning features that are critical for all siz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11714v1-abstract-full').style.display = 'inline'; document.getElementById('2311.11714v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.11714v1-abstract-full" style="display: none;"> Object detection models, a prominent class of machine learning algorithms, aim to identify and precisely locate objects in images or videos. However, this task might yield uneven performances sometimes caused by the objects sizes and the quality of the images and labels used for training. In this paper, we highlight the importance of large objects in learning features that are critical for all sizes. Given these findings, we propose to introduce a weighting term into the training loss. This term is a function of the object area size. We show that giving more weight to large objects leads to improved detection scores across all object sizes and so an overall improvement in Object Detectors performances (+2 p.p. of mAP on small objects, +2 p.p. on medium and +4 p.p. on large on COCO val 2017 with InternImage-T). Additional experiments and ablation studies with different models and on a different dataset further confirm the robustness of our findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11714v1-abstract-full').style.display = 'none'; document.getElementById('2311.11714v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision, Jan 2024, WAIKOLOA, HAWAII, United States </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.00525">arXiv:2310.00525</a> <span> [<a href="https://arxiv.org/pdf/2310.00525">pdf</a>, <a href="https://arxiv.org/format/2310.00525">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement learning adaptive fuzzy controller for lighting systems: application to aircraft cabin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vashishtha%2C+K">Kritika Vashishtha</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+A">Anas Saad</a>, <a href="/search/cs?searchtype=author&query=Faieghi%2C+R">Reza Faieghi</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+F">Fengfeng Xi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.00525v1-abstract-short" style="display: inline;"> The lighting requirements are subjective and one light setting cannot work for all. However, there is little work on developing smart lighting algorithms that can adapt to user preferences. To address this gap, this paper uses fuzzy logic and reinforcement learning to develop an adaptive lighting algorithm. In particular, we develop a baseline fuzzy inference system (FIS) using the domain knowledg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00525v1-abstract-full').style.display = 'inline'; document.getElementById('2310.00525v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.00525v1-abstract-full" style="display: none;"> The lighting requirements are subjective and one light setting cannot work for all. However, there is little work on developing smart lighting algorithms that can adapt to user preferences. To address this gap, this paper uses fuzzy logic and reinforcement learning to develop an adaptive lighting algorithm. In particular, we develop a baseline fuzzy inference system (FIS) using the domain knowledge. We use the existing literature to create a FIS that generates lighting setting recommendations based on environmental conditions i.e. daily glare index, and user information including age, activity, and chronotype. Through a feedback mechanism, the user interacts with the algorithm, correcting the algorithm output to their preferences. We interpret these corrections as rewards to a Q-learning agent, which tunes the FIS parameters online to match the user preferences. We implement the algorithm in an aircraft cabin mockup and conduct an extensive user study to evaluate the effectiveness of the algorithm and understand its learning behavior. Our implementation results demonstrate that the developed algorithm possesses the capability to learn user preferences while successfully adapting to a wide range of environmental conditions and user characteristics. and can deal with a diverse spectrum of environmental conditions and user characteristics. This underscores its viability as a potent solution for intelligent light management, featuring advanced learning capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00525v1-abstract-full').style.display = 'none'; document.getElementById('2310.00525v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.09607">arXiv:2307.09607</a> <span> [<a href="https://arxiv.org/pdf/2307.09607">pdf</a>, <a href="https://arxiv.org/format/2307.09607">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sequential Monte Carlo Learning for Time Series Structure Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saad%2C+F+A">Feras A. Saad</a>, <a href="/search/cs?searchtype=author&query=Patton%2C+B+J">Brian J. Patton</a>, <a href="/search/cs?searchtype=author&query=Hoffman%2C+M+D">Matthew D. Hoffman</a>, <a href="/search/cs?searchtype=author&query=Saurous%2C+R+A">Rif A. Saurous</a>, <a href="/search/cs?searchtype=author&query=Mansinghka%2C+V+K">Vikash K. Mansinghka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.09607v1-abstract-short" style="display: inline;"> This paper presents a new approach to automatically discovering accurate models of complex time series data. Working within a Bayesian nonparametric prior over a symbolic space of Gaussian process time series models, we present a novel structure learning algorithm that integrates sequential Monte Carlo (SMC) and involutive MCMC for highly effective posterior inference. Our method can be used both… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.09607v1-abstract-full').style.display = 'inline'; document.getElementById('2307.09607v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.09607v1-abstract-full" style="display: none;"> This paper presents a new approach to automatically discovering accurate models of complex time series data. Working within a Bayesian nonparametric prior over a symbolic space of Gaussian process time series models, we present a novel structure learning algorithm that integrates sequential Monte Carlo (SMC) and involutive MCMC for highly effective posterior inference. Our method can be used both in "online" settings, where new data is incorporated sequentially in time, and in "offline" settings, by using nested subsets of historical data to anneal the posterior. Empirical measurements on real-world time series show that our method can deliver 10x--100x runtime speedups over previous MCMC and greedy-search structure learning algorithms targeting the same model family. We use our method to perform the first large-scale evaluation of Gaussian process time series structure learning on a prominent benchmark of 1,428 econometric datasets. The results show that our method discovers sensible models that deliver more accurate point forecasts and interval forecasts over multiple horizons as compared to widely used statistical and neural baselines that struggle on this challenging data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.09607v1-abstract-full').style.display = 'none'; document.getElementById('2307.09607v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 8 figures, 2 tables. Appearing in ICML 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 40th International Conference on Machine Learning, PMLR 202:29473-29489, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.03058">arXiv:2305.03058</a> <span> [<a href="https://arxiv.org/pdf/2305.03058">pdf</a>, <a href="https://arxiv.org/format/2305.03058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Plug-and-Play Multilingual Few-shot Spoken Words Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Tsouvalas%2C+V">Vasileios Tsouvalas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.03058v1-abstract-short" style="display: inline;"> As technology advances and digital devices become prevalent, seamless human-machine communication is increasingly gaining significance. The growing adoption of mobile, wearable, and other Internet of Things (IoT) devices has changed how we interact with these smart devices, making accurate spoken words recognition a crucial component for effective interaction. However, building robust spoken words… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.03058v1-abstract-full').style.display = 'inline'; document.getElementById('2305.03058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.03058v1-abstract-full" style="display: none;"> As technology advances and digital devices become prevalent, seamless human-machine communication is increasingly gaining significance. The growing adoption of mobile, wearable, and other Internet of Things (IoT) devices has changed how we interact with these smart devices, making accurate spoken words recognition a crucial component for effective interaction. However, building robust spoken words detection system that can handle novel keywords remains challenging, especially for low-resource languages with limited training data. Here, we propose PLiX, a multilingual and plug-and-play keyword spotting system that leverages few-shot learning to harness massive real-world data and enable the recognition of unseen spoken words at test-time. Our few-shot deep models are learned with millions of one-second audio clips across 20 languages, achieving state-of-the-art performance while being highly efficient. Extensive evaluations show that PLiX can generalize to novel spoken words given as few as just one support example and performs well on unseen languages out of the box. We release models and inference code to serve as a foundation for future research and voice-enabled user interface development for emerging devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.03058v1-abstract-full').style.display = 'none'; document.getElementById('2305.03058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code: https://github.com/FewshotML/plix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.10988">arXiv:2303.10988</a> <span> [<a href="https://arxiv.org/pdf/2303.10988">pdf</a>, <a href="https://arxiv.org/ps/2303.10988">ps</a>, <a href="https://arxiv.org/format/2303.10988">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> This Was (Not) Intended: How Intent Communication and Biometrics Can Enhance Social Interactions With Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kassem%2C+K">Khaled Kassem</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+A">Alia Saad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.10988v1-abstract-short" style="display: inline;"> Socially Assistive Robots (SARs) are robots that are designed to replicate the role of a caregiver, coach, or teacher, providing emotional, cognitive, and social cues to support a specific group. SARs are becoming increasingly prevalent, especially in elderly care. Effective communication, both explicit and implicit, is a critical aspect of human-robot interaction involving SARs. Intent communicat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.10988v1-abstract-full').style.display = 'inline'; document.getElementById('2303.10988v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.10988v1-abstract-full" style="display: none;"> Socially Assistive Robots (SARs) are robots that are designed to replicate the role of a caregiver, coach, or teacher, providing emotional, cognitive, and social cues to support a specific group. SARs are becoming increasingly prevalent, especially in elderly care. Effective communication, both explicit and implicit, is a critical aspect of human-robot interaction involving SARs. Intent communication is necessary for SARs to engage in effective communication with humans. Biometrics can provide crucial information about a person's identity or emotions. By linking these biometric signals to the communication of intent, SARs can gain a profound understanding of their users and tailor their interactions accordingly. The development of reliable and robust biometric sensing and analysis systems is critical to the success of SARs. In this work, we focus on four different aspects to evaluate the communication of intent involving SARs, existing works, and our outlook on future works and applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.10988v1-abstract-full').style.display = 'none'; document.getElementById('2303.10988v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> SARTMI/2023/8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.04205">arXiv:2301.04205</a> <span> [<a href="https://arxiv.org/pdf/2301.04205">pdf</a>, <a href="https://arxiv.org/format/2301.04205">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> A Performance Verification Methodology for Resource Allocation Heuristics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Goel%2C+S">Saksham Goel</a>, <a href="/search/cs?searchtype=author&query=Mikek%2C+B">Benjamin Mikek</a>, <a href="/search/cs?searchtype=author&query=Aly%2C+J">Jehad Aly</a>, <a href="/search/cs?searchtype=author&query=Arun%2C+V">Venkat Arun</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Ahmed Saeed</a>, <a href="/search/cs?searchtype=author&query=Akella%2C+A">Aditya Akella</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.04205v2-abstract-short" style="display: inline;"> Performance verification is a nascent but promising tool for understanding the performance and limitations of heuristics under realistic assumptions. Bespoke performance verification tools have already demonstrated their value in settings like congestion control and packet scheduling. In this paper, we aim to emphasize the broad applicability and utility of performance verification. To that end, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.04205v2-abstract-full').style.display = 'inline'; document.getElementById('2301.04205v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.04205v2-abstract-full" style="display: none;"> Performance verification is a nascent but promising tool for understanding the performance and limitations of heuristics under realistic assumptions. Bespoke performance verification tools have already demonstrated their value in settings like congestion control and packet scheduling. In this paper, we aim to emphasize the broad applicability and utility of performance verification. To that end, we highlight the design principles of performance verification. Then, we leverage that understanding to develop a set of easy-to-follow guidelines that are applicable to a wide range of resource allocation heuristics. In particular, we introduce Virelay, a framework that enables heuristic designers to express the behavior of their algorithms and their assumptions about the system in an environment that resembles a discrete-event simulator. We demonstrate the utility and ease-of-use of Virelay by applying it to six diverse case studies. We produce bounds on the performance of classical algorithms, work stealing and SRPT scheduling, under practical assumptions. We demonstrate Virelay's expressiveness by capturing existing models for congestion control and packet scheduling, and we verify the observation that TCP unfairness can cause some ML training workloads to spontaneously converge to a state of high network utilization. Finally, we use Virelay to identify two bugs in the Linux CFS load balancer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.04205v2-abstract-full').style.display = 'none'; document.getElementById('2301.04205v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.10177">arXiv:2211.10177</a> <span> [<a href="https://arxiv.org/pdf/2211.10177">pdf</a>, <a href="https://arxiv.org/format/2211.10177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Improving Pixel-Level Contrastive Learning by Leveraging Exogenous Depth Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saad%2C+A+B">Ahmed Ben Saad</a>, <a href="/search/cs?searchtype=author&query=Prokopetc%2C+K">Kristina Prokopetc</a>, <a href="/search/cs?searchtype=author&query=Kherroubi%2C+J">Josselin Kherroubi</a>, <a href="/search/cs?searchtype=author&query=Davy%2C+A">Axel Davy</a>, <a href="/search/cs?searchtype=author&query=Courtois%2C+A">Adrien Courtois</a>, <a href="/search/cs?searchtype=author&query=Facciolo%2C+G">Gabriele Facciolo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.10177v1-abstract-short" style="display: inline;"> Self-supervised representation learning based on Contrastive Learning (CL) has been the subject of much attention in recent years. This is due to the excellent results obtained on a variety of subsequent tasks (in particular classification), without requiring a large amount of labeled samples. However, most reference CL algorithms (such as SimCLR and MoCo, but also BYOL and Barlow Twins) are not a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.10177v1-abstract-full').style.display = 'inline'; document.getElementById('2211.10177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.10177v1-abstract-full" style="display: none;"> Self-supervised representation learning based on Contrastive Learning (CL) has been the subject of much attention in recent years. This is due to the excellent results obtained on a variety of subsequent tasks (in particular classification), without requiring a large amount of labeled samples. However, most reference CL algorithms (such as SimCLR and MoCo, but also BYOL and Barlow Twins) are not adapted to pixel-level downstream tasks. One existing solution known as PixPro proposes a pixel-level approach that is based on filtering of pairs of positive/negative image crops of the same image using the distance between the crops in the whole image. We argue that this idea can be further enhanced by incorporating semantic information provided by exogenous data as an additional selection filter, which can be used (at training time) to improve the selection of the pixel-level positive/negative samples. In this paper we will focus on the depth information, which can be obtained by using a depth estimation network or measured from available data (stereovision, parallax motion, LiDAR, etc.). Scene depth can provide meaningful cues to distinguish pixels belonging to different objects based on their depth. We show that using this exogenous information in the contrastive loss leads to improved results and that the learned representations better follow the shapes of objects. In addition, we introduce a multi-scale loss that alleviates the issue of finding the training parameters adapted to different object sizes. We demonstrate the effectiveness of our ideas on the Breakout Segmentation on Borehole Images where we achieve an improvement of 1.9\% over PixPro and nearly 5\% over the supervised baseline. We further validate our technique on the indoor scene segmentation tasks with ScanNet and outdoor scenes with CityScapes ( 1.6\% and 1.1\% improvement over PixPro respectively). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.10177v1-abstract-full').style.display = 'none'; document.getElementById('2211.10177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for WACV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00119">arXiv:2211.00119</a> <span> [<a href="https://arxiv.org/pdf/2211.00119">pdf</a>, <a href="https://arxiv.org/format/2211.00119">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP49357.2023.10096465">10.1109/ICASSP49357.2023.10096465 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Active Learning of Non-semantic Speech Tasks with Pretrained Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+H">Harlin Lee</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Bertozzi%2C+A+L">Andrea L. Bertozzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00119v4-abstract-short" style="display: inline;"> Pretraining neural networks with massive unlabeled datasets has become popular as it equips the deep models with a better prior to solve downstream tasks. However, this approach generally assumes that the downstream tasks have access to annotated data of sufficient size. In this work, we propose ALOE, a novel system for improving the data- and label-efficiency of non-semantic speech tasks with act… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00119v4-abstract-full').style.display = 'inline'; document.getElementById('2211.00119v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00119v4-abstract-full" style="display: none;"> Pretraining neural networks with massive unlabeled datasets has become popular as it equips the deep models with a better prior to solve downstream tasks. However, this approach generally assumes that the downstream tasks have access to annotated data of sufficient size. In this work, we propose ALOE, a novel system for improving the data- and label-efficiency of non-semantic speech tasks with active learning. ALOE uses pretrained models in conjunction with active learning to label data incrementally and learn classifiers for downstream tasks, thereby mitigating the need to acquire labeled data beforehand. We demonstrate the effectiveness of ALOE on a wide range of tasks, uncertainty-based acquisition functions, and model architectures. Training a linear classifier on top of a frozen encoder with ALOE is shown to achieve performance similar to several baselines that utilize the entire labeled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00119v4-abstract-full').style.display = 'none'; document.getElementById('2211.00119v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at: ICASSP'23, Code: https://github.com/HarlinLee/ALOE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.15283">arXiv:2210.15283</a> <span> [<a href="https://arxiv.org/pdf/2210.15283">pdf</a>, <a href="https://arxiv.org/format/2210.15283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> On Out-of-Distribution Detection for Audio with Deep Nearest Neighbors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bukhsh%2C+Z">Zaharah Bukhsh</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.15283v2-abstract-short" style="display: inline;"> Out-of-distribution (OOD) detection is concerned with identifying data points that do not belong to the same distribution as the model's training data. For the safe deployment of predictive models in a real-world environment, it is critical to avoid making confident predictions on OOD inputs as it can lead to potentially dangerous consequences. However, OOD detection largely remains an under-explo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.15283v2-abstract-full').style.display = 'inline'; document.getElementById('2210.15283v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.15283v2-abstract-full" style="display: none;"> Out-of-distribution (OOD) detection is concerned with identifying data points that do not belong to the same distribution as the model's training data. For the safe deployment of predictive models in a real-world environment, it is critical to avoid making confident predictions on OOD inputs as it can lead to potentially dangerous consequences. However, OOD detection largely remains an under-explored area in the audio (and speech) domain. This is despite the fact that audio is a central modality for many tasks, such as speaker diarization, automatic speech recognition, and sound event detection. To address this, we propose to leverage feature-space of the model with deep k-nearest neighbors to detect OOD samples. We show that this simple and flexible method effectively detects OOD inputs across a broad category of audio (and speech) datasets. Specifically, it improves the false positive rate (FPR@TPR95) by 17% and the AUROC score by 7% than other prior techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.15283v2-abstract-full').style.display = 'none'; document.getElementById('2210.15283v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICASSP'23. Webpage: https://zaharah.github.io/ood_audio, Code: https://github.com/Zaharah/ood_audio</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.09378">arXiv:2208.09378</a> <span> [<a href="https://arxiv.org/pdf/2208.09378">pdf</a>, <a href="https://arxiv.org/format/2208.09378">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Labeling Chaos to Learning Harmony: Federated Learning with Noisy Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsouvalas%2C+V">Vasileios Tsouvalas</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Ozcelebi%2C+T">Tanir Ozcelebi</a>, <a href="/search/cs?searchtype=author&query=Meratnia%2C+N">Nirvana Meratnia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.09378v3-abstract-short" style="display: inline;"> Federated Learning (FL) is a distributed machine learning paradigm that enables learning models from decentralized private datasets, where the labeling effort is entrusted to the clients. While most existing FL approaches assume high-quality labels are readily available on users' devices; in reality, label noise can naturally occur in FL and is closely related to clients' characteristics. Due to s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09378v3-abstract-full').style.display = 'inline'; document.getElementById('2208.09378v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.09378v3-abstract-full" style="display: none;"> Federated Learning (FL) is a distributed machine learning paradigm that enables learning models from decentralized private datasets, where the labeling effort is entrusted to the clients. While most existing FL approaches assume high-quality labels are readily available on users' devices; in reality, label noise can naturally occur in FL and is closely related to clients' characteristics. Due to scarcity of available data and significant label noise variations among clients in FL, existing state-of-the-art centralized approaches exhibit unsatisfactory performance, while prior FL studies rely on excessive on-device computational schemes or additional clean data available on server. Here, we propose FedLN, a framework to deal with label noise across different FL training stages; namely, FL initialization, on-device model training, and server model aggregation, able to accommodate the diverse computational capabilities of devices in a FL system. Specifically, FedLN computes per-client noise-level estimation in a single federated round and improves the models' performance by either correcting or mitigating the effect of noisy samples. Our evaluation on various publicly available vision and audio datasets demonstrate a 22% improvement on average compared to other existing methods for a label noise level of 60%. We further validate the efficiency of FedLN in human-annotated real-world noisy datasets and report a 4.8% increase on average in models' recognition performance, highlighting that~\method~can be useful for improving FL services provided to everyday users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09378v3-abstract-full').style.display = 'none'; document.getElementById('2208.09378v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.00467">arXiv:2208.00467</a> <span> [<a href="https://arxiv.org/pdf/2208.00467">pdf</a>, <a href="https://arxiv.org/format/2208.00467">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3550316">10.1145/3550316 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> COCOA: Cross Modality Contrastive Learning for Sensor Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deldari%2C+S">Shohreh Deldari</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+H">Hao Xue</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+D+V">Daniel V. Smith</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.00467v2-abstract-short" style="display: inline;"> Self-Supervised Learning (SSL) is a new paradigm for learning discriminative representations without labelled data and has reached comparable or even state-of-the-art results in comparison to supervised counterparts. Contrastive Learning (CL) is one of the most well-known approaches in SSL that attempts to learn general, informative representations of data. CL methods have been mostly developed fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.00467v2-abstract-full').style.display = 'inline'; document.getElementById('2208.00467v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.00467v2-abstract-full" style="display: none;"> Self-Supervised Learning (SSL) is a new paradigm for learning discriminative representations without labelled data and has reached comparable or even state-of-the-art results in comparison to supervised counterparts. Contrastive Learning (CL) is one of the most well-known approaches in SSL that attempts to learn general, informative representations of data. CL methods have been mostly developed for applications in computer vision and natural language processing where only a single sensor modality is used. A majority of pervasive computing applications, however, exploit data from a range of different sensor modalities. While existing CL methods are limited to learning from one or two data sources, we propose COCOA (Cross mOdality COntrastive leArning), a self-supervised model that employs a novel objective function to learn quality representations from multisensor data by computing the cross-correlation between different data modalities and minimizing the similarity between irrelevant instances. We evaluate the effectiveness of COCOA against eight recently introduced state-of-the-art self-supervised models, and two supervised baselines across five public datasets. We show that COCOA achieves superior classification performance to all other approaches. Also, COCOA is far more label-efficient than the other baselines including the fully supervised model using only one-tenth of available labelled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.00467v2-abstract-full').style.display = 'none'; document.getElementById('2208.00467v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 10 figures, 6 tables, Accepted with minor revision at IMWUT Vol. 6 No. 3</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.06921">arXiv:2207.06921</a> <span> [<a href="https://arxiv.org/pdf/2207.06921">pdf</a>, <a href="https://arxiv.org/format/2207.06921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automatic Sleep Scoring from Large-scale Multi-channel Pediatric EEG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+H">Harlin Lee</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.06921v4-abstract-short" style="display: inline;"> Sleep is particularly important to the health of infants, children, and adolescents, and sleep scoring is the first step to accurate diagnosis and treatment of potentially life-threatening conditions. But pediatric sleep is severely under-researched compared to adult sleep in the context of machine learning for health, and sleep scoring algorithms developed for adults usually perform poorly on inf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06921v4-abstract-full').style.display = 'inline'; document.getElementById('2207.06921v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.06921v4-abstract-full" style="display: none;"> Sleep is particularly important to the health of infants, children, and adolescents, and sleep scoring is the first step to accurate diagnosis and treatment of potentially life-threatening conditions. But pediatric sleep is severely under-researched compared to adult sleep in the context of machine learning for health, and sleep scoring algorithms developed for adults usually perform poorly on infants. Here, we present the first automated sleep scoring results on a recent large-scale pediatric sleep study dataset that was collected during standard clinical care. We develop a transformer-based model that learns to classify five sleep stages from millions of multi-channel electroencephalogram (EEG) sleep epochs with 78% overall accuracy. Further, we conduct an in-depth analysis of the model performance based on patient demographics and EEG channels. The results point to the growing need for machine learning research on pediatric sleep. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06921v4-abstract-full').style.display = 'none'; document.getElementById('2207.06921v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Learning from Time Series for Health. Workshop at NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.05784">arXiv:2207.05784</a> <span> [<a href="https://arxiv.org/pdf/2207.05784">pdf</a>, <a href="https://arxiv.org/format/2207.05784">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.patrec.2023.11.028">10.1016/j.patrec.2023.11.028 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Distilled Non-Semantic Speech Embeddings with Binary Neural Networks for Low-Resource Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+H">Harlin Lee</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.05784v4-abstract-short" style="display: inline;"> This work introduces BRILLsson, a novel binary neural network-based representation learning model for a broad range of non-semantic speech tasks. We train the model with knowledge distillation from a large and real-valued TRILLsson model with only a fraction of the dataset used to train TRILLsson. The resulting BRILLsson models are only 2MB in size with a latency less than 8ms, making them suitabl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05784v4-abstract-full').style.display = 'inline'; document.getElementById('2207.05784v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.05784v4-abstract-full" style="display: none;"> This work introduces BRILLsson, a novel binary neural network-based representation learning model for a broad range of non-semantic speech tasks. We train the model with knowledge distillation from a large and real-valued TRILLsson model with only a fraction of the dataset used to train TRILLsson. The resulting BRILLsson models are only 2MB in size with a latency less than 8ms, making them suitable for deployment in low-resource devices such as wearables. We evaluate BRILLsson on eight benchmark tasks (including but not limited to spoken language identification, emotion recognition, health condition diagnosis, and keyword spotting), and demonstrate that our proposed ultra-light and low-latency models perform as well as large-scale models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05784v4-abstract-full').style.display = 'none'; document.getElementById('2207.05784v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Pattern Recognition Letters, vol. 177, pp. 15-19, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.09029">arXiv:2206.09029</a> <span> [<a href="https://arxiv.org/pdf/2206.09029">pdf</a>, <a href="https://arxiv.org/format/2206.09029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Binary Early-Exit Network for Adaptive Inference on Low-Resource Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.09029v1-abstract-short" style="display: inline;"> Deep neural networks have significantly improved performance on a range of tasks with the increasing demand for computational resources, leaving deployment on low-resource devices (with limited memory and battery power) infeasible. Binary neural networks (BNNs) tackle the issue to an extent with extreme compression and speed-up gains compared to real-valued models. We propose a simple but effectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.09029v1-abstract-full').style.display = 'inline'; document.getElementById('2206.09029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.09029v1-abstract-full" style="display: none;"> Deep neural networks have significantly improved performance on a range of tasks with the increasing demand for computational resources, leaving deployment on low-resource devices (with limited memory and battery power) infeasible. Binary neural networks (BNNs) tackle the issue to an extent with extreme compression and speed-up gains compared to real-valued models. We propose a simple but effective method to accelerate inference through unifying BNNs with an early-exiting strategy. Our approach allows simple instances to exit early based on a decision threshold and utilizes output layers added to different intermediate layers to avoid executing the entire binary model. We extensively evaluate our method on three audio classification tasks and across four BNNs architectures. Our method demonstrates favorable quality-efficiency trade-offs while being controllable with an entropy-based threshold specified by the system user. It also results in better speed-ups (latency less than 6ms) with a single model based on existing BNN architectures without retraining for different efficiency levels. It also provides a straightforward way to estimate sample difficulty and better understanding of uncertainty around certain classes within the dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.09029v1-abstract-full').style.display = 'none'; document.getElementById('2206.09029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Interspeech 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.08429">arXiv:2206.08429</a> <span> [<a href="https://arxiv.org/pdf/2206.08429">pdf</a>, <a href="https://arxiv.org/format/2206.08429">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scalable Temporal Localization of Sensitive Activities in Movies and TV Episodes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hao%2C+X">Xiang Hao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingxiang Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shixing Chen</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+A">Ahmed Saad</a>, <a href="/search/cs?searchtype=author&query=Hamid%2C+R">Raffay Hamid</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.08429v1-abstract-short" style="display: inline;"> To help customers make better-informed viewing choices, video-streaming services try to moderate their content and provide more visibility into which portions of their movies and TV episodes contain age-appropriate material (e.g., nudity, sex, violence, or drug-use). Supervised models to localize these sensitive activities require large amounts of clip-level labeled data which is hard to obtain, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.08429v1-abstract-full').style.display = 'inline'; document.getElementById('2206.08429v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.08429v1-abstract-full" style="display: none;"> To help customers make better-informed viewing choices, video-streaming services try to moderate their content and provide more visibility into which portions of their movies and TV episodes contain age-appropriate material (e.g., nudity, sex, violence, or drug-use). Supervised models to localize these sensitive activities require large amounts of clip-level labeled data which is hard to obtain, while weakly-supervised models to this end usually do not offer competitive accuracy. To address this challenge, we propose a novel Coarse2Fine network designed to make use of readily obtainable video-level weak labels in conjunction with sparse clip-level labels of age-appropriate activities. Our model aggregates frame-level predictions to make video-level classifications and is therefore able to leverage sparse clip-level labels along with video-level labels. Furthermore, by performing frame-level predictions in a hierarchical manner, our approach is able to overcome the label-imbalance problem caused due to the rare-occurrence nature of age-appropriate content. We present comparative results of our approach using 41,234 movies and TV episodes (~3 years of video-content) from 521 sub-genres and 250 countries making it by far the largest-scale empirical analysis of age-appropriate activity localization in long-form videos ever published. Our approach offers 107.2% relative mAP improvement (from 5.5% to 11.4%) over existing state-of-the-art activity-localization approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.08429v1-abstract-full').style.display = 'none'; document.getElementById('2206.08429v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.02353">arXiv:2206.02353</a> <span> [<a href="https://arxiv.org/pdf/2206.02353">pdf</a>, <a href="https://arxiv.org/format/2206.02353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Beyond Just Vision: A Review on Self-Supervised Representation Learning on Multimodal and Temporal Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deldari%2C+S">Shohreh Deldari</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+H">Hao Xue</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jiayuan He</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+D+V">Daniel V. Smith</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.02353v2-abstract-short" style="display: inline;"> Recently, Self-Supervised Representation Learning (SSRL) has attracted much attention in the field of computer vision, speech, natural language processing (NLP), and recently, with other types of modalities, including time series from sensors. The popularity of self-supervised learning is driven by the fact that traditional models typically require a huge amount of well-annotated data for training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02353v2-abstract-full').style.display = 'inline'; document.getElementById('2206.02353v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.02353v2-abstract-full" style="display: none;"> Recently, Self-Supervised Representation Learning (SSRL) has attracted much attention in the field of computer vision, speech, natural language processing (NLP), and recently, with other types of modalities, including time series from sensors. The popularity of self-supervised learning is driven by the fact that traditional models typically require a huge amount of well-annotated data for training. Acquiring annotated data can be a difficult and costly process. Self-supervised methods have been introduced to improve the efficiency of training data through discriminative pre-training of models using supervisory signals that have been freely obtained from the raw data. Unlike existing reviews of SSRL that have pre-dominately focused upon methods in the fields of CV or NLP for a single modality, we aim to provide the first comprehensive review of multimodal self-supervised learning methods for temporal data. To this end, we 1) provide a comprehensive categorization of existing SSRL methods, 2) introduce a generic pipeline by defining the key components of a SSRL framework, 3) compare existing models in terms of their objective function, network architecture and potential applications, and 4) review existing multimodal techniques in each category and various modalities. Finally, we present existing weaknesses and future opportunities. We believe our work develops a perspective on the requirements of SSRL in domains that utilise multimodal and/or temporal data <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02353v2-abstract-full').style.display = 'none'; document.getElementById('2206.02353v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages, 5 figures, 9 tables, Survey paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.09660">arXiv:2204.09660</a> <span> [<a href="https://arxiv.org/pdf/2204.09660">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.dib.2022.108089">10.1016/j.dib.2022.108089 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Medical Dataset Classification for Kurdish Short Text over Social Media </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+A+M">Ari M. Saeed</a>, <a href="/search/cs?searchtype=author&query=Hussein%2C+S+R">Shnya R. Hussein</a>, <a href="/search/cs?searchtype=author&query=Ali%2C+C+M">Chro M. Ali</a>, <a href="/search/cs?searchtype=author&query=Rashid%2C+T+A">Tarik A. Rashid</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.09660v1-abstract-short" style="display: inline;"> The Facebook application is used as a resource for collecting the comments of this dataset, The dataset consists of 6756 comments to create a Medical Kurdish Dataset (MKD). The samples are comments of users, which are gathered from different posts of pages (Medical, News, Economy, Education, and Sport). Six steps as a preprocessing technique are performed on the raw dataset to clean and remove noi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09660v1-abstract-full').style.display = 'inline'; document.getElementById('2204.09660v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.09660v1-abstract-full" style="display: none;"> The Facebook application is used as a resource for collecting the comments of this dataset, The dataset consists of 6756 comments to create a Medical Kurdish Dataset (MKD). The samples are comments of users, which are gathered from different posts of pages (Medical, News, Economy, Education, and Sport). Six steps as a preprocessing technique are performed on the raw dataset to clean and remove noise in the comments by replacing characters. The comments (short text) are labeled for positive class (medical comment) and negative class (non-medical comment) as text classification. The percentage ratio of the negative class is 55% while the positive class is 45%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09660v1-abstract-full').style.display = 'none'; document.getElementById('2204.09660v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> DIB, 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.12363">arXiv:2202.12363</a> <span> [<a href="https://arxiv.org/pdf/2202.12363">pdf</a>, <a href="https://arxiv.org/format/2202.12363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Estimators of Entropy and Information via Inference in Probabilistic Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saad%2C+F+A">Feras A. Saad</a>, <a href="/search/cs?searchtype=author&query=Cusumano-Towner%2C+M">Marco Cusumano-Towner</a>, <a href="/search/cs?searchtype=author&query=Mansinghka%2C+V+K">Vikash K. Mansinghka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.12363v4-abstract-short" style="display: inline;"> Estimating information-theoretic quantities such as entropy and mutual information is central to many problems in statistics and machine learning, but challenging in high dimensions. This paper presents estimators of entropy via inference (EEVI), which deliver upper and lower bounds on many information quantities for arbitrary variables in a probabilistic generative model. These estimators use imp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12363v4-abstract-full').style.display = 'inline'; document.getElementById('2202.12363v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.12363v4-abstract-full" style="display: none;"> Estimating information-theoretic quantities such as entropy and mutual information is central to many problems in statistics and machine learning, but challenging in high dimensions. This paper presents estimators of entropy via inference (EEVI), which deliver upper and lower bounds on many information quantities for arbitrary variables in a probabilistic generative model. These estimators use importance sampling with proposal distribution families that include amortized variational inference and sequential Monte Carlo, which can be tailored to the target model and used to squeeze true information values with high accuracy. We present several theoretical properties of EEVI and demonstrate scalability and efficacy on two problems from the medical domain: (i) in an expert system for diagnosing liver disorders, we rank medical tests according to how informative they are about latent diseases, given a pattern of observed symptoms and patient attributes; and (ii) in a differential equation model of carbohydrate metabolism, we find optimal times to take blood glucose measurements that maximize information about a diabetic patient's insulin sensitivity, given their meal and medication schedule. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12363v4-abstract-full').style.display = 'none'; document.getElementById('2202.12363v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 8 figures. Appearing in AISTATS 2022</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 25th International Conference on Artificial Intelligence and Statistics, PMLR 151:5604-5621, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.10623">arXiv:2201.10623</a> <span> [<a href="https://arxiv.org/pdf/2201.10623">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s00500-021-06691-4">10.1007/s00500-021-06691-4 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A comprehensive review and evaluation on text predictive and entertainment systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hamarashid%2C+H+K">Hozan K. Hamarashid</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+S+A">Soran A. Saeed</a>, <a href="/search/cs?searchtype=author&query=Rashid%2C+T+A">Tarik A. Rashid</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.10623v1-abstract-short" style="display: inline;"> One of the most important ways to experience communication and interact with the systems is by handling the prediction of the most likely words to happen after typing letters or words. It is helpful for people with disabilities due to disabling people who could type or enter texts at a limited slow speed. Also, it is beneficial for people with dyslexia and those people who are not well with spells… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.10623v1-abstract-full').style.display = 'inline'; document.getElementById('2201.10623v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.10623v1-abstract-full" style="display: none;"> One of the most important ways to experience communication and interact with the systems is by handling the prediction of the most likely words to happen after typing letters or words. It is helpful for people with disabilities due to disabling people who could type or enter texts at a limited slow speed. Also, it is beneficial for people with dyslexia and those people who are not well with spells of words. Though, an input technology, for instance, the next word suggestion facilitates the typing process in smartphones as an example. This means that when a user types a word, then the system suggests the next words to be chosen in which the necessary word by the user. Besides, it can be used in entertainment as a gam, for example, to determine a target word and reach it or tackle it within 10 attempts of prediction. Generally, the systems depend on a text corpus, which was provided in the system to conduct the prediction. Writing every single word is time-consuming, therefore, it is vitally important to decrease time consumption by reducing efforts to input texts in the systems by offering most probable words for the user to select, this could be done via next word prediction systems. There are several techniques can be found in literature, which is utilized to conduct a variety of next word prediction systems by using different approaches. In this paper, a survey of miscellaneous techniques towards the next word prediction systems will be addressed. Besides, the evaluation of the prediction systems will be discussed. Then, a modal technique will be determined to be utilized for the next word prediction system from the perspective of easiness of implementation and obtaining a good result. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.10623v1-abstract-full').style.display = 'none'; document.getElementById('2201.10623v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Soft computing, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.00725">arXiv:2112.00725</a> <span> [<a href="https://arxiv.org/pdf/2112.00725">pdf</a>, <a href="https://arxiv.org/format/2112.00725">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> The Augmented Image Prior: Distilling 1000 Classes by Extrapolating from a Single Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Asano%2C+Y+M">Yuki M. Asano</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.00725v4-abstract-short" style="display: inline;"> What can neural networks learn about the visual world when provided with only a single image as input? While any image obviously cannot contain the multitudes of all existing objects, scenes and lighting conditions - within the space of all 256^(3x224x224) possible 224-sized square images, it might still provide a strong prior for natural images. To analyze this `augmented image prior' hypothesis,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.00725v4-abstract-full').style.display = 'inline'; document.getElementById('2112.00725v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.00725v4-abstract-full" style="display: none;"> What can neural networks learn about the visual world when provided with only a single image as input? While any image obviously cannot contain the multitudes of all existing objects, scenes and lighting conditions - within the space of all 256^(3x224x224) possible 224-sized square images, it might still provide a strong prior for natural images. To analyze this `augmented image prior' hypothesis, we develop a simple framework for training neural networks from scratch using a single image and augmentations using knowledge distillation from a supervised pretrained teacher. With this, we find the answer to the above question to be: `surprisingly, a lot'. In quantitative terms, we find accuracies of 94%/74% on CIFAR-10/100, 69% on ImageNet, and by extending this method to video and audio, 51% on Kinetics-400 and 84% on SpeechCommands. In extensive analyses spanning 13 datasets, we disentangle the effect of augmentations, choice of data and network architectures and also provide qualitative evaluations that include lucid `panda neurons' in networks that have never even seen one. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.00725v4-abstract-full').style.display = 'none'; document.getElementById('2112.00725v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICLR'23. Webpage: https://single-image-distill.github.io/, code: https://github.com/yukimasano/single-img-extrapolating</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.13192">arXiv:2109.13192</a> <span> [<a href="https://arxiv.org/pdf/2109.13192">pdf</a>, <a href="https://arxiv.org/format/2109.13192">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Consistency Training of Multi-exit Architectures for Sensor Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.13192v1-abstract-short" style="display: inline;"> Deep neural networks have become larger over the years with increasing demand of computational resources for inference; incurring exacerbate costs and leaving little room for deployment on devices with limited battery and other resources for real-time applications. The multi-exit architectures are type of deep neural network that are interleaved with several output (or exit) layers at varying dept… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.13192v1-abstract-full').style.display = 'inline'; document.getElementById('2109.13192v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.13192v1-abstract-full" style="display: none;"> Deep neural networks have become larger over the years with increasing demand of computational resources for inference; incurring exacerbate costs and leaving little room for deployment on devices with limited battery and other resources for real-time applications. The multi-exit architectures are type of deep neural network that are interleaved with several output (or exit) layers at varying depths of the model. They provide a sound approach for improving computational time and energy utilization of running a model through producing predictions from early exits. In this work, we present a novel and architecture-agnostic approach for robust training of multi-exit architectures termed consistent exit training. The crux of the method lies in a consistency-based objective to enforce prediction invariance over clean and perturbed inputs. We leverage weak supervision to align model output with consistency training and jointly optimize dual-losses in a multi-task learning fashion over the exits in a network. Our technique enables exit layers to generalize better when confronted with increasing uncertainty, hence, resulting in superior quality-efficiency trade-offs. We demonstrate through extensive evaluation on challenging learning tasks involving sensor data that our approach allows examples to exit earlier with better detection rate and without executing all the layers in a deep model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.13192v1-abstract-full').style.display = 'none'; document.getElementById('2109.13192v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.12811">arXiv:2108.12811</a> <span> [<a href="https://arxiv.org/pdf/2108.12811">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Airplane Type Identification Based on Mask RCNN and Drone Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Alshaibani%2C+W+T">W. T Alshaibani</a>, <a href="/search/cs?searchtype=author&query=Helvaci%2C+M">Mustafa Helvaci</a>, <a href="/search/cs?searchtype=author&query=Shayea%2C+I">Ibraheem Shayea</a>, <a href="/search/cs?searchtype=author&query=Saad%2C+S+A">Sawsan A. Saad</a>, <a href="/search/cs?searchtype=author&query=Azizan%2C+A">Azizul Azizan</a>, <a href="/search/cs?searchtype=author&query=Yakub%2C+F">Fitri Yakub</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.12811v1-abstract-short" style="display: inline;"> For dealing with traffic bottlenecks at airports, aircraft object detection is insufficient. Every airport generally has a variety of planes with various physical and technological requirements as well as diverse service requirements. Detecting the presence of new planes will not address all traffic congestion issues. Identifying the type of airplane, on the other hand, will entirely fix the probl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.12811v1-abstract-full').style.display = 'inline'; document.getElementById('2108.12811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.12811v1-abstract-full" style="display: none;"> For dealing with traffic bottlenecks at airports, aircraft object detection is insufficient. Every airport generally has a variety of planes with various physical and technological requirements as well as diverse service requirements. Detecting the presence of new planes will not address all traffic congestion issues. Identifying the type of airplane, on the other hand, will entirely fix the problem because it will offer important information about the plane's technical specifications (i.e., the time it needs to be served and its appropriate place in the airport). Several studies have provided various contributions to address airport traffic jams; however, their ultimate goal was to determine the existence of airplane objects. This paper provides a practical approach to identify the type of airplane in airports depending on the results provided by the airplane detection process using mask region convolution neural network. The key feature employed to identify the type of airplane is the surface area calculated based on the results of airplane detection. The surface area is used to assess the estimated cabin length which is considered as an additional key feature for identifying the airplane type. The length of any detected plane may be calculated by measuring the distance between the detected plane's two furthest points. The suggested approach's performance is assessed using average accuracies and a confusion matrix. The findings show that this method is dependable. This method will greatly aid in the management of airport traffic congestion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.12811v1-abstract-full').style.display = 'none'; document.getElementById('2108.12811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 page</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.07208">arXiv:2108.07208</a> <span> [<a href="https://arxiv.org/pdf/2108.07208">pdf</a>, <a href="https://arxiv.org/format/2108.07208">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Infinite Relational Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saad%2C+F+A">Feras A. Saad</a>, <a href="/search/cs?searchtype=author&query=Mansinghka%2C+V+K">Vikash K. Mansinghka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.07208v1-abstract-short" style="display: inline;"> This paper describes the hierarchical infinite relational model (HIRM), a new probabilistic generative model for noisy, sparse, and heterogeneous relational data. Given a set of relations defined over a collection of domains, the model first infers multiple non-overlapping clusters of relations using a top-level Chinese restaurant process. Within each cluster of relations, a Dirichlet process mixt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.07208v1-abstract-full').style.display = 'inline'; document.getElementById('2108.07208v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.07208v1-abstract-full" style="display: none;"> This paper describes the hierarchical infinite relational model (HIRM), a new probabilistic generative model for noisy, sparse, and heterogeneous relational data. Given a set of relations defined over a collection of domains, the model first infers multiple non-overlapping clusters of relations using a top-level Chinese restaurant process. Within each cluster of relations, a Dirichlet process mixture is then used to partition the domain entities and model the probability distribution of relation values. The HIRM generalizes the standard infinite relational model and can be used for a variety of data analysis tasks including dependence detection, clustering, and density estimation. We present new algorithms for fully Bayesian posterior inference via Gibbs sampling. We illustrate the efficacy of the method on a density estimation benchmark of twenty object-attribute datasets with up to 18 million cells and use it to discover relational structure in real-world datasets from politics and genomics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.07208v1-abstract-full').style.display = 'none'; document.getElementById('2108.07208v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 6 figures, 4 tables. Appearing in UAI 2021</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 37th Conference on Uncertainty in Artificial Intelligence, PMLR 161:1067-1077, 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.06877">arXiv:2107.06877</a> <span> [<a href="https://arxiv.org/pdf/2107.06877">pdf</a>, <a href="https://arxiv.org/format/2107.06877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Federated Self-Training for Semi-Supervised Audio Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tsouvalas%2C+V">Vasileios Tsouvalas</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Aaqib Saeed</a>, <a href="/search/cs?searchtype=author&query=Ozcelebi%2C+T">Tanir Ozcelebi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.06877v2-abstract-short" style="display: inline;"> Federated Learning is a distributed machine learning paradigm dealing with decentralized and personal datasets. Since data reside on devices like smartphones and virtual assistants, labeling is entrusted to the clients, or labels are extracted in an automated way. Specifically, in the case of audio data, acquiring semantic annotations can be prohibitively expensive and time-consuming. As a result,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06877v2-abstract-full').style.display = 'inline'; document.getElementById('2107.06877v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.06877v2-abstract-full" style="display: none;"> Federated Learning is a distributed machine learning paradigm dealing with decentralized and personal datasets. Since data reside on devices like smartphones and virtual assistants, labeling is entrusted to the clients, or labels are extracted in an automated way. Specifically, in the case of audio data, acquiring semantic annotations can be prohibitively expensive and time-consuming. As a result, an abundance of audio data remains unlabeled and unexploited on users' devices. Most existing federated learning approaches focus on supervised learning without harnessing the unlabeled data. In this work, we study the problem of semi-supervised learning of audio models via self-training in conjunction with federated learning. We propose FedSTAR to exploit large-scale on-device unlabeled data to improve the generalization of audio recognition models. We further demonstrate that self-supervised pre-trained models can accelerate the training of on-device models, significantly improving convergence to within fewer training rounds. We conduct experiments on diverse public audio classification datasets and investigate the performance of our models under varying percentages of labeled and unlabeled data. Notably, we show that with as little as 3% labeled data available, FedSTAR on average can improve the recognition rate by 13.28% compared to the fully supervised federated model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06877v2-abstract-full').style.display = 'none'; document.getElementById('2107.06877v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.11999">arXiv:2105.11999</a> <span> [<a href="https://arxiv.org/pdf/2105.11999">pdf</a>, <a href="https://arxiv.org/format/2105.11999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Throughput-Fairness Tradeoffs in Mobility Platforms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balasingam%2C+A">Arjun Balasingam</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+K">Karthik Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Mittal%2C+R">Radhika Mittal</a>, <a href="/search/cs?searchtype=author&query=Arun%2C+V">Venkat Arun</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Ahmed Saeed</a>, <a href="/search/cs?searchtype=author&query=Alizadeh%2C+M">Mohammad Alizadeh</a>, <a href="/search/cs?searchtype=author&query=Balakrishnan%2C+H">Hamsa Balakrishnan</a>, <a href="/search/cs?searchtype=author&query=Balakrishnan%2C+H">Hari Balakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.11999v1-abstract-short" style="display: inline;"> This paper studies the problem of allocating tasks from different customers to vehicles in mobility platforms, which are used for applications like food and package delivery, ridesharing, and mobile sensing. A mobility platform should allocate tasks to vehicles and schedule them in order to optimize both throughput and fairness across customers. However, existing approaches to scheduling tasks in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.11999v1-abstract-full').style.display = 'inline'; document.getElementById('2105.11999v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.11999v1-abstract-full" style="display: none;"> This paper studies the problem of allocating tasks from different customers to vehicles in mobility platforms, which are used for applications like food and package delivery, ridesharing, and mobile sensing. A mobility platform should allocate tasks to vehicles and schedule them in order to optimize both throughput and fairness across customers. However, existing approaches to scheduling tasks in mobility platforms ignore fairness. We introduce Mobius, a system that uses guided optimization to achieve both high throughput and fairness across customers. Mobius supports spatiotemporally diverse and dynamic customer demands. It provides a principled method to navigate inherent tradeoffs between fairness and throughput caused by shared mobility. Our evaluation demonstrates these properties, along with the versatility and scalability of Mobius, using traces gathered from ridesharing and aerial sensing applications. Our ridesharing case study shows that Mobius can schedule more than 16,000 tasks across 40 customers and 200 vehicles in an online manner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.11999v1-abstract-full').style.display = 'none'; document.getElementById('2105.11999v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical report for paper to appear at ACM MobiSys 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.02697">arXiv:2105.02697</a> <span> [<a href="https://arxiv.org/pdf/2105.02697">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.24017/science.2016.1.2.2">10.24017/science.2016.1.2.2 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Evaluating e-Government Services in Kurdistan Institution for Strategic Studies and Scientific Research Using the EGOVSAT Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hassan%2C+B+A">Bryar A. Hassan</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+A+M">Aram M. Ahmed</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+S+A">Soran A. Saeed</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A+A">Awin A. Saeed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.02697v1-abstract-short" style="display: inline;"> Office automation is an initiative used to digitally deliver services to citizens, private and public sectors. It is used to digitally collect, store, create, and manipulate office information as a need of accomplishing basic tasks. Azya Office Automation has been implemented as a pilot project in Kurdistan Institution for Strategic Studies and Scientific Research (KISSR) since 2013. The efficienc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.02697v1-abstract-full').style.display = 'inline'; document.getElementById('2105.02697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.02697v1-abstract-full" style="display: none;"> Office automation is an initiative used to digitally deliver services to citizens, private and public sectors. It is used to digitally collect, store, create, and manipulate office information as a need of accomplishing basic tasks. Azya Office Automation has been implemented as a pilot project in Kurdistan Institution for Strategic Studies and Scientific Research (KISSR) since 2013. The efficiency of governance in Kurdistan Institution for Strategic Studies and Scientific Research has been improved, thanks to its implementation. The aims of this research paper is to evaluate user satisfaction of this software and identify its significant predictors using EGOVSAT Model. The user satisfaction of this model encompasses five main parts, which are utility, reliability, efficiency, customization, and flexibility. For that purpose, a detailed survey is conducted to measure the level of user satisfaction. A total of sixteen questions have distributed among forty one users of the software in KISSR. In order to evaluate the software, three measurement have been used which are reliability test, regression analysis and correlation analysis. The results indicate that the software is successful to a decent extent based on user satisfaction feedbacks obtained by using EGOVSAT Model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.02697v1-abstract-full').style.display = 'none'; document.getElementById('2105.02697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.13774">arXiv:2104.13774</a> <span> [<a href="https://arxiv.org/pdf/2104.13774">pdf</a>, <a href="https://arxiv.org/format/2104.13774">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-72582-2_20">10.1007/978-3-030-72582-2_20 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Scouting the Path to a Million-Client Server </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yimeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Saeed%2C+A">Ahmed Saeed</a>, <a href="/search/cs?searchtype=author&query=Ammar%2C+M">Mostafa Ammar</a>, <a href="/search/cs?searchtype=author&query=Zegura%2C+E">Ellen Zegura</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.13774v2-abstract-short" style="display: inline;"> To keep up with demand, servers will scale up to handle hundreds of thousands of clients simultaneously. Much of the focus of the community has been on scaling servers in terms of aggregate traffic intensity (packets transmitted per second). However, bottlenecks caused by the increasing number of concurrent clients, resulting in a large number of concurrent flows, have received little attention. I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.13774v2-abstract-full').style.display = 'inline'; document.getElementById('2104.13774v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.13774v2-abstract-full" style="display: none;"> To keep up with demand, servers will scale up to handle hundreds of thousands of clients simultaneously. Much of the focus of the community has been on scaling servers in terms of aggregate traffic intensity (packets transmitted per second). However, bottlenecks caused by the increasing number of concurrent clients, resulting in a large number of concurrent flows, have received little attention. In this work, we focus on identifying such bottlenecks. In particular, we define two broad categories of problems; namely, admitting more packets into the network stack than can be handled efficiently, and increasing per-packet overhead within the stack. We show that these problems contribute to high CPU usage and network performance degradation in terms of aggregate throughput and RTT. Our measurement and analysis are performed in the context of the Linux networking stack, the the most widely used publicly available networking stack. Further, we discuss the relevance of our findings to other network stacks. The goal of our work is to highlight considerations required in the design of future networking stacks to enable efficient handling of large numbers of clients and flows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.13774v2-abstract-full').style.display = 'none'; document.getElementById('2104.13774v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In: Hohlfeld O., Lutu A., Levin D. (eds) Passive and Active Measurement. PAM 2021. Lecture Notes in Computer Science, vol 12671. Springer, Cham </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Saeed%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Saeed%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Saeed%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>