Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 87 results for author: <span class="mathjax">Reddy, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Reddy%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Reddy, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Reddy%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Reddy, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Reddy%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Reddy%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Reddy%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.20698">arXiv:2503.20698</a> <span> [<a href="https://arxiv.org/pdf/2503.20698">pdf</a>, <a href="https://arxiv.org/format/2503.20698">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> MMMORRF: Multimodal Multilingual Modularized Reciprocal Rank Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Samuel%2C+S">Saron Samuel</a>, <a href="/search/cs?searchtype=author&query=DeGenaro%2C+D">Dan DeGenaro</a>, <a href="/search/cs?searchtype=author&query=Guallar-Blasco%2C+J">Jimena Guallar-Blasco</a>, <a href="/search/cs?searchtype=author&query=Sanders%2C+K">Kate Sanders</a>, <a href="/search/cs?searchtype=author&query=Eisape%2C+O">Oluwaseun Eisape</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Arun Reddy</a>, <a href="/search/cs?searchtype=author&query=Martin%2C+A">Alexander Martin</a>, <a href="/search/cs?searchtype=author&query=Yates%2C+A">Andrew Yates</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+E">Eugene Yang</a>, <a href="/search/cs?searchtype=author&query=Carpenter%2C+C">Cameron Carpenter</a>, <a href="/search/cs?searchtype=author&query=Etter%2C+D">David Etter</a>, <a href="/search/cs?searchtype=author&query=Kayi%2C+E">Efsun Kayi</a>, <a href="/search/cs?searchtype=author&query=Wiesner%2C+M">Matthew Wiesner</a>, <a href="/search/cs?searchtype=author&query=Murray%2C+K">Kenton Murray</a>, <a href="/search/cs?searchtype=author&query=Kriz%2C+R">Reno Kriz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.20698v1-abstract-short" style="display: inline;"> Videos inherently contain multiple modalities, including visual events, text overlays, sounds, and speech, all of which are important for retrieval. However, state-of-the-art multimodal language models like VAST and LanguageBind are built on vision-language models (VLMs), and thus overly prioritize visual signals. Retrieval benchmarks further reinforce this bias by focusing on visual queries and n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20698v1-abstract-full').style.display = 'inline'; document.getElementById('2503.20698v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.20698v1-abstract-full" style="display: none;"> Videos inherently contain multiple modalities, including visual events, text overlays, sounds, and speech, all of which are important for retrieval. However, state-of-the-art multimodal language models like VAST and LanguageBind are built on vision-language models (VLMs), and thus overly prioritize visual signals. Retrieval benchmarks further reinforce this bias by focusing on visual queries and neglecting other modalities. We create a search system MMMORRF that extracts text and features from both visual and audio modalities and integrates them with a novel modality-aware weighted reciprocal rank fusion. MMMORRF is both effective and efficient, demonstrating practicality in searching videos based on users' information needs instead of visual descriptive queries. We evaluate MMMORRF on MultiVENT 2.0 and TVR, two multimodal benchmarks designed for more targeted information needs, and find that it improves nDCG@20 by 81% over leading multimodal encoders and 37% over single-modality retrieval, demonstrating the value of integrating diverse modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20698v1-abstract-full').style.display = 'none'; document.getElementById('2503.20698v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.19009">arXiv:2503.19009</a> <span> [<a href="https://arxiv.org/pdf/2503.19009">pdf</a>, <a href="https://arxiv.org/format/2503.19009">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Video-ColBERT: Contextualized Late Interaction for Text-to-Video Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Arun Reddy</a>, <a href="/search/cs?searchtype=author&query=Martin%2C+A">Alexander Martin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+E">Eugene Yang</a>, <a href="/search/cs?searchtype=author&query=Yates%2C+A">Andrew Yates</a>, <a href="/search/cs?searchtype=author&query=Sanders%2C+K">Kate Sanders</a>, <a href="/search/cs?searchtype=author&query=Murray%2C+K">Kenton Murray</a>, <a href="/search/cs?searchtype=author&query=Kriz%2C+R">Reno Kriz</a>, <a href="/search/cs?searchtype=author&query=de+Melo%2C+C+M">Celso M. de Melo</a>, <a href="/search/cs?searchtype=author&query=Van+Durme%2C+B">Benjamin Van Durme</a>, <a href="/search/cs?searchtype=author&query=Chellappa%2C+R">Rama Chellappa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.19009v1-abstract-short" style="display: inline;"> In this work, we tackle the problem of text-to-video retrieval (T2VR). Inspired by the success of late interaction techniques in text-document, text-image, and text-video retrieval, our approach, Video-ColBERT, introduces a simple and efficient mechanism for fine-grained similarity assessment between queries and videos. Video-ColBERT is built upon 3 main components: a fine-grained spatial and temp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19009v1-abstract-full').style.display = 'inline'; document.getElementById('2503.19009v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.19009v1-abstract-full" style="display: none;"> In this work, we tackle the problem of text-to-video retrieval (T2VR). Inspired by the success of late interaction techniques in text-document, text-image, and text-video retrieval, our approach, Video-ColBERT, introduces a simple and efficient mechanism for fine-grained similarity assessment between queries and videos. Video-ColBERT is built upon 3 main components: a fine-grained spatial and temporal token-wise interaction, query and visual expansions, and a dual sigmoid loss during training. We find that this interaction and training paradigm leads to strong individual, yet compatible, representations for encoding video content. These representations lead to increases in performance on common text-to-video retrieval benchmarks compared to other bi-encoder methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19009v1-abstract-full').style.display = 'none'; document.getElementById('2503.19009v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2025. 13 pages, 4 figures. Approved for public release: distribution unlimited</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.13330">arXiv:2503.13330</a> <span> [<a href="https://arxiv.org/pdf/2503.13330">pdf</a>, <a href="https://arxiv.org/format/2503.13330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LEAVS: An LLM-based Labeler for Abdominal CT Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lanfredi%2C+R+B">Ricardo Bigolin Lanfredi</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+Y">Yan Zhuang</a>, <a href="/search/cs?searchtype=author&query=Finkelstein%2C+M">Mark Finkelstein</a>, <a href="/search/cs?searchtype=author&query=Balamuralikrishna%2C+P+T+S">Praveen Thoppey Srinivasan Balamuralikrishna</a>, <a href="/search/cs?searchtype=author&query=Krembs%2C+L">Luke Krembs</a>, <a href="/search/cs?searchtype=author&query=Khoury%2C+B">Brandon Khoury</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Arthi Reddy</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+P">Pritam Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Rofsky%2C+N+M">Neil M. Rofsky</a>, <a href="/search/cs?searchtype=author&query=Summers%2C+R+M">Ronald M. Summers</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.13330v1-abstract-short" style="display: inline;"> Extracting structured labels from radiology reports has been employed to create vision models to simultaneously detect several types of abnormalities. However, existing works focus mainly on the chest region. Few works have been investigated on abdominal radiology reports due to more complex anatomy and a wider range of pathologies in the abdomen. We propose LEAVS (Large language model Extractor f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.13330v1-abstract-full').style.display = 'inline'; document.getElementById('2503.13330v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.13330v1-abstract-full" style="display: none;"> Extracting structured labels from radiology reports has been employed to create vision models to simultaneously detect several types of abnormalities. However, existing works focus mainly on the chest region. Few works have been investigated on abdominal radiology reports due to more complex anatomy and a wider range of pathologies in the abdomen. We propose LEAVS (Large language model Extractor for Abdominal Vision Supervision). This labeler can annotate the certainty of presence and the urgency of seven types of abnormalities for nine abdominal organs on CT radiology reports. To ensure broad coverage, we chose abnormalities that encompass most of the finding types from CT reports. Our approach employs a specialized chain-of-thought prompting strategy for a locally-run LLM using sentence extraction and multiple-choice questions in a tree-based decision system. We demonstrate that the LLM can extract several abnormality types across abdominal organs with an average F1 score of 0.89, significantly outperforming competing labelers and humans. Additionally, we show that extraction of urgency labels achieved performance comparable to human annotations. Finally, we demonstrate that the abnormality labels contain valuable information for training a single vision model that classifies several organs as normal or abnormal. We release our code and structured annotations for a public CT dataset containing over 1,000 CT volumes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.13330v1-abstract-full').style.display = 'none'; document.getElementById('2503.13330v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05839">arXiv:2501.05839</a> <span> [<a href="https://arxiv.org/pdf/2501.05839">pdf</a>, <a href="https://arxiv.org/format/2501.05839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Poetry in Pixels: Prompt Tuning for Poem Image Generation via Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jamil%2C+S">Sofia Jamil</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+B+A">Bollampalli Areen Reddy</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Raghvendra Kumar</a>, <a href="/search/cs?searchtype=author&query=Saha%2C+S">Sriparna Saha</a>, <a href="/search/cs?searchtype=author&query=Joseph%2C+K+J">K J Joseph</a>, <a href="/search/cs?searchtype=author&query=Goswami%2C+K">Koustava Goswami</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05839v1-abstract-short" style="display: inline;"> The task of text-to-image generation has encountered significant challenges when applied to literary works, especially poetry. Poems are a distinct form of literature, with meanings that frequently transcend beyond the literal words. To address this shortcoming, we propose a PoemToPixel framework designed to generate images that visually represent the inherent meanings of poems. Our approach incor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05839v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05839v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05839v1-abstract-full" style="display: none;"> The task of text-to-image generation has encountered significant challenges when applied to literary works, especially poetry. Poems are a distinct form of literature, with meanings that frequently transcend beyond the literal words. To address this shortcoming, we propose a PoemToPixel framework designed to generate images that visually represent the inherent meanings of poems. Our approach incorporates the concept of prompt tuning in our image generation framework to ensure that the resulting images closely align with the poetic content. In addition, we propose the PoeKey algorithm, which extracts three key elements in the form of emotions, visual elements, and themes from poems to form instructions which are subsequently provided to a diffusion model for generating corresponding images. Furthermore, to expand the diversity of the poetry dataset across different genres and ages, we introduce MiniPo, a novel multimodal dataset comprising 1001 children's poems and images. Leveraging this dataset alongside PoemSum, we conducted both quantitative and qualitative evaluations of image generation using our PoemToPixel framework. This paper demonstrates the effectiveness of our approach and offers a fresh perspective on generating images from literary sources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05839v1-abstract-full').style.display = 'none'; document.getElementById('2501.05839v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03486">arXiv:2501.03486</a> <span> [<a href="https://arxiv.org/pdf/2501.03486">pdf</a>, <a href="https://arxiv.org/format/2501.03486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Align-Pro: A Principled Approach to Prompt Optimization for LLM Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Trivedi%2C+P">Prashant Trivedi</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+S">Souradip Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Avinash Reddy</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+V">Vaneet Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Bedi%2C+A+S">Amrit Singh Bedi</a>, <a href="/search/cs?searchtype=author&query=Atia%2C+G+K">George K. Atia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03486v1-abstract-short" style="display: inline;"> The alignment of large language models (LLMs) with human values is critical as these models become increasingly integrated into various societal and decision-making processes. Traditional methods, such as reinforcement learning from human feedback (RLHF), achieve alignment by fine-tuning model parameters, but these approaches are often computationally expensive and impractical when models are froz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03486v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03486v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03486v1-abstract-full" style="display: none;"> The alignment of large language models (LLMs) with human values is critical as these models become increasingly integrated into various societal and decision-making processes. Traditional methods, such as reinforcement learning from human feedback (RLHF), achieve alignment by fine-tuning model parameters, but these approaches are often computationally expensive and impractical when models are frozen or inaccessible for parameter modification. In contrast, prompt optimization is a viable alternative to RLHF for LLM alignment. While the existing literature has shown empirical promise of prompt optimization, its theoretical underpinning remains under-explored. We address this gap by formulating prompt optimization as an optimization problem and try to provide theoretical insights into the optimality of such a framework. To analyze the performance of the prompt optimization, we study theoretical suboptimality bounds and provide insights in terms of how prompt optimization depends upon the given prompter and target model. We also provide empirical validation through experiments on various datasets, demonstrating that prompt optimization can effectively align LLMs, even when parameter fine-tuning is not feasible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03486v1-abstract-full').style.display = 'none'; document.getElementById('2501.03486v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, Accepted in AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08648">arXiv:2412.08648</a> <span> [<a href="https://arxiv.org/pdf/2412.08648">pdf</a>, <a href="https://arxiv.org/format/2412.08648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detecting Visual Triggers in Cannabis Imagery: A CLIP-Based Multi-Labeling Framework with Local-Global Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+L">Linqi Lu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xianshi Yu</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+P">Akhil Perumal Reddy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08648v1-abstract-short" style="display: inline;"> This study investigates the interplay of visual and textual features in online discussions about cannabis edibles and their impact on user engagement. Leveraging the CLIP model, we analyzed 42,743 images from Facebook (March 1 to August 31, 2021), with a focus on detecting food-related visuals and examining the influence of image attributes such as colorfulness and brightness on user interaction.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08648v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08648v1-abstract-full" style="display: none;"> This study investigates the interplay of visual and textual features in online discussions about cannabis edibles and their impact on user engagement. Leveraging the CLIP model, we analyzed 42,743 images from Facebook (March 1 to August 31, 2021), with a focus on detecting food-related visuals and examining the influence of image attributes such as colorfulness and brightness on user interaction. For textual analysis, we utilized the BART model as a denoising autoencoder to classify ten topics derived from structural topic modeling, exploring their relationship with user engagement. Linear regression analysis identified significant positive correlations between food-related visuals (e.g., fruit, candy, and bakery) and user engagement scores, as well as between engagement and text topics such as cannabis legalization. In contrast, negative associations were observed with image colorfulness and certain textual themes. These findings offer actionable insights for policymakers and regulatory bodies in designing warning labels and marketing regulations to address potential risks associated with recreational cannabis edibles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08648v1-abstract-full').style.display = 'none'; document.getElementById('2412.08648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This project was initiated in September 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10132">arXiv:2411.10132</a> <span> [<a href="https://arxiv.org/pdf/2411.10132">pdf</a>, <a href="https://arxiv.org/format/2411.10132">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Omnichain Web: The Universal Framework for Streamlined Chain Abstraction and Cross-Layer Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gajera%2C+H">Hardik Gajera</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Akhil Reddy</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+B">Bhagath Reddy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10132v2-abstract-short" style="display: inline;"> The Web3 ecosystem is highly fragmented, making seamless integration difficult for over a billion Web2 businesses, enterprises, and AI protocols. As blockchains, rollups, and app-specific chains expand, cross-chain interactions remain inefficient, and liquidity is deeply fragmented. AI systems lack standardized blockchain access, limiting autonomous functionality. Intent-based interactions, crucia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10132v2-abstract-full').style.display = 'inline'; document.getElementById('2411.10132v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10132v2-abstract-full" style="display: none;"> The Web3 ecosystem is highly fragmented, making seamless integration difficult for over a billion Web2 businesses, enterprises, and AI protocols. As blockchains, rollups, and app-specific chains expand, cross-chain interactions remain inefficient, and liquidity is deeply fragmented. AI systems lack standardized blockchain access, limiting autonomous functionality. Intent-based interactions, crucial for AI-driven automation, face scalability issues due to the absence of robust execution platforms. Meanwhile, the current solver ecosystem is centralized, as liquidity rebalancing remains a challenge due to a lack of developer-friendly tools. Dojima's Omnichain Web introduces a universal framework that abstracts blockchain complexity, bridging Web2, Web3, and AI. At its core, OmniRollups facilitate scalable execution across chains, while the Omni Sequencer ensures atomic, secure intent processing. Linera microchains enable AI-driven transaction automation, seamlessly integrating with Web3 data streams. Ragno Network decentralizes L1 infrastructure, optimizing cross-chain liquidity flows, while the Proof Network enhances cryptographic security for omnichain transactions. Finally, the Builder Marketplace introduces a solver-driven execution layer, allowing developers to build and monetize intent-based applications without liquidity constraints. By fostering a composable marketplace at the intersection of Web2 and Web3, Omnichain Web enables the seamless flow of data, value, and computation. This framework mirrors the internet, bridging Web3 decentralization with Web2 scale to drive the next wave of adoption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10132v2-abstract-full').style.display = 'none'; document.getElementById('2411.10132v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05359">arXiv:2411.05359</a> <span> [<a href="https://arxiv.org/pdf/2411.05359">pdf</a>, <a href="https://arxiv.org/format/2411.05359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Agricultural Landscape Understanding At Country-Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dua%2C+R">Radhika Dua</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+N">Nikita Saxena</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Aditi Agarwal</a>, <a href="/search/cs?searchtype=author&query=Wilson%2C+A">Alex Wilson</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gaurav Singh</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+H">Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+I">Ishan Deshpande</a>, <a href="/search/cs?searchtype=author&query=Kaur%2C+A">Amandeep Kaur</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+G">Gaurav Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Nath%2C+C">Chandan Nath</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+A">Arnab Basu</a>, <a href="/search/cs?searchtype=author&query=Batchu%2C+V">Vishal Batchu</a>, <a href="/search/cs?searchtype=author&query=Holla%2C+S">Sharath Holla</a>, <a href="/search/cs?searchtype=author&query=Kurle%2C+B">Bindiya Kurle</a>, <a href="/search/cs?searchtype=author&query=Missura%2C+O">Olana Missura</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+R">Rahul Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shubhika Garg</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N">Nishi Shah</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Avneet Singh</a>, <a href="/search/cs?searchtype=author&query=Tewari%2C+D">Dinesh Tewari</a>, <a href="/search/cs?searchtype=author&query=Dondzik%2C+A">Agata Dondzik</a>, <a href="/search/cs?searchtype=author&query=Adsul%2C+B">Bharat Adsul</a>, <a href="/search/cs?searchtype=author&query=Sohoni%2C+M">Milind Sohoni</a>, <a href="/search/cs?searchtype=author&query=Praveen%2C+A+R">Asim Rama Praveen</a>, <a href="/search/cs?searchtype=author&query=Dangi%2C+A">Aaryan Dangi</a> , et al. (10 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05359v1-abstract-short" style="display: inline;"> Agricultural landscapes are quite complex, especially in the Global South where fields are smaller, and agricultural practices are more varied. In this paper we report on our progress in digitizing the agricultural landscape (natural and man-made) in our study region of India. We use high resolution imagery and a UNet style segmentation model to generate the first of its kind national-scale multi-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05359v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05359v1-abstract-full" style="display: none;"> Agricultural landscapes are quite complex, especially in the Global South where fields are smaller, and agricultural practices are more varied. In this paper we report on our progress in digitizing the agricultural landscape (natural and man-made) in our study region of India. We use high resolution imagery and a UNet style segmentation model to generate the first of its kind national-scale multi-class panoptic segmentation output. Through this work we have been able to identify individual fields across 151.7M hectares, and delineating key features such as water resources and vegetation. We share how this output was validated by our team and externally by downstream users, including some sample use cases that can lead to targeted data driven decision making. We believe this dataset will contribute towards digitizing agriculture by generating the foundational baselayer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05359v1-abstract-full').style.display = 'none'; document.getElementById('2411.05359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 7 tables, 15 figs</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02152">arXiv:2410.02152</a> <span> [<a href="https://arxiv.org/pdf/2410.02152">pdf</a>, <a href="https://arxiv.org/format/2410.02152">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1117/12.3013530">10.1117/12.3013530 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Evaluation of Large Pre-Trained Models for Gesture Recognition using Synthetic Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Arun Reddy</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+K">Ketul Shah</a>, <a href="/search/cs?searchtype=author&query=Rivera%2C+C">Corban Rivera</a>, <a href="/search/cs?searchtype=author&query=Paul%2C+W">William Paul</a>, <a href="/search/cs?searchtype=author&query=De+Melo%2C+C+M">Celso M. De Melo</a>, <a href="/search/cs?searchtype=author&query=Chellappa%2C+R">Rama Chellappa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02152v1-abstract-short" style="display: inline;"> In this work, we explore the possibility of using synthetically generated data for video-based gesture recognition with large pre-trained models. We consider whether these models have sufficiently robust and expressive representation spaces to enable "training-free" classification. Specifically, we utilize various state-of-the-art video encoders to extract features for use in k-nearest neighbors c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02152v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02152v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02152v1-abstract-full" style="display: none;"> In this work, we explore the possibility of using synthetically generated data for video-based gesture recognition with large pre-trained models. We consider whether these models have sufficiently robust and expressive representation spaces to enable "training-free" classification. Specifically, we utilize various state-of-the-art video encoders to extract features for use in k-nearest neighbors classification, where the training data points are derived from synthetic videos only. We compare these results with another training-free approach -- zero-shot classification using text descriptions of each gesture. In our experiments with the RoCoG-v2 dataset, we find that using synthetic training videos yields significantly lower classification accuracy on real test videos compared to using a relatively small number of real training videos. We also observe that video backbones that were fine-tuned on classification tasks serve as superior feature extractors, and that the choice of fine-tuning data has a substantial impact on k-nearest neighbors performance. Lastly, we find that zero-shot text-based classification performs poorly on the gesture recognition task, as gestures are not easily described through natural language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02152v1-abstract-full').style.display = 'none'; document.getElementById('2410.02152v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Synthetic Data for Artificial Intelligence and Machine Learning: Tools, Techniques, and Applications II (SPIE Defense + Commercial Sensing, 2024)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Synthetic Data for Artificial Intelligence and Machine Learning: Tools, Techniques, and Applications II. Vol. 13035. SPIE, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18239">arXiv:2409.18239</a> <span> [<a href="https://arxiv.org/pdf/2409.18239">pdf</a>, <a href="https://arxiv.org/format/2409.18239">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Towards Sub-millisecond Latency Real-Time Speech Enhancement Models on Hearables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dementyev%2C+A">Artem Dementyev</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+C+K+A">Chandan K. A. Reddy</a>, <a href="/search/cs?searchtype=author&query=Wisdom%2C+S">Scott Wisdom</a>, <a href="/search/cs?searchtype=author&query=Chatlani%2C+N">Navin Chatlani</a>, <a href="/search/cs?searchtype=author&query=Hershey%2C+J+R">John R. Hershey</a>, <a href="/search/cs?searchtype=author&query=Lyon%2C+R+F">Richard F. Lyon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18239v2-abstract-short" style="display: inline;"> Low latency models are critical for real-time speech enhancement applications, such as hearing aids and hearables. However, the sub-millisecond latency space for resource-constrained hearables remains underexplored. We demonstrate speech enhancement using a computationally efficient minimum-phase FIR filter, enabling sample-by-sample processing to achieve mean algorithmic latency of 0.32 ms to 1.2… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18239v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18239v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18239v2-abstract-full" style="display: none;"> Low latency models are critical for real-time speech enhancement applications, such as hearing aids and hearables. However, the sub-millisecond latency space for resource-constrained hearables remains underexplored. We demonstrate speech enhancement using a computationally efficient minimum-phase FIR filter, enabling sample-by-sample processing to achieve mean algorithmic latency of 0.32 ms to 1.25 ms. With a single microphone, we observe a mean SI-SDRi of 4.1 dB. The approach shows generalization with a DNSMOS increase of 0.2 on unseen audio recordings. We use a lightweight LSTM-based model of 626k parameters to generate FIR taps. Using a real hardware implementation on a low-power DSP, our system can run with 376 MIPS and a mean end-to-end latency of 3.35 ms. In addition, we provide a comparison with existing low-latency spectral masking techniques. We hope this work will enable a better understanding of latency and can be used to improve the comfort and usability of hearables. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18239v2-abstract-full').style.display = 'none'; document.getElementById('2409.18239v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17840">arXiv:2409.17840</a> <span> [<a href="https://arxiv.org/pdf/2409.17840">pdf</a>, <a href="https://arxiv.org/format/2409.17840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Detecting and Measuring Confounding Using Causal Mechanism Shifts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17840v1-abstract-short" style="display: inline;"> Detecting and measuring confounding effects from data is a key challenge in causal inference. Existing methods frequently assume causal sufficiency, disregarding the presence of unobserved confounding variables. Causal sufficiency is both unrealistic and empirically untestable. Additionally, existing methods make strong parametric assumptions about the underlying causal generative process to guara… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17840v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17840v1-abstract-full" style="display: none;"> Detecting and measuring confounding effects from data is a key challenge in causal inference. Existing methods frequently assume causal sufficiency, disregarding the presence of unobserved confounding variables. Causal sufficiency is both unrealistic and empirically untestable. Additionally, existing methods make strong parametric assumptions about the underlying causal generative process to guarantee the identifiability of confounding variables. Relaxing the causal sufficiency and parametric assumptions and leveraging recent advancements in causal discovery and confounding analysis with non-i.i.d. data, we propose a comprehensive approach for detecting and measuring confounding. We consider various definitions of confounding and introduce tailored methodologies to achieve three objectives: (i) detecting and measuring confounding among a set of variables, (ii) separating observed and unobserved confounding effects, and (iii) understanding the relative strengths of confounding bias between different sets of variables. We present useful properties of a confounding measure and present measures that satisfy those properties. Empirical results support the theoretical analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17840v1-abstract-full').style.display = 'none'; document.getElementById('2409.17840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07612">arXiv:2407.07612</a> <span> [<a href="https://arxiv.org/pdf/2407.07612">pdf</a>, <a href="https://arxiv.org/format/2407.07612">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Teaching Transformers Causal Reasoning through Axiomatic Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vashishtha%2C+A">Aniket Vashishtha</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Abhinav Kumar</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07612v1-abstract-short" style="display: inline;"> For text-based AI systems to interact in the real world, causal reasoning is an essential skill. Since interventional data is costly to generate, we study to what extent an agent can learn causal reasoning from passive data. Specifically, we consider an axiomatic training setup where an agent learns from multiple demonstrations of a causal axiom (or rule), rather than incorporating the axiom as an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07612v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07612v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07612v1-abstract-full" style="display: none;"> For text-based AI systems to interact in the real world, causal reasoning is an essential skill. Since interventional data is costly to generate, we study to what extent an agent can learn causal reasoning from passive data. Specifically, we consider an axiomatic training setup where an agent learns from multiple demonstrations of a causal axiom (or rule), rather than incorporating the axiom as an inductive bias or inferring it from data values. A key question is whether the agent would learn to generalize from the axiom demonstrations to new scenarios. For example, if a transformer model is trained on demonstrations of the causal transitivity axiom over small graphs, would it generalize to applying the transitivity axiom over large graphs? Our results, based on a novel axiomatic training scheme, indicate that such generalization is possible. We consider the task of inferring whether a variable causes another variable, given a causal graph structure. We find that a 67 million parameter transformer model, when trained on linear causal chains (along with some noisy variations) can generalize well to new kinds of graphs, including longer causal chains, causal chains with reversed order, and graphs with branching; even when it is not explicitly trained for such settings. Our model performs at par (or even better) than many larger language models such as GPT-4, Gemini Pro, and Phi-3. Overall, our axiomatic training framework provides a new paradigm of learning causal reasoning from passive data that can be used to learn arbitrary axioms, as long as sufficient demonstrations can be generated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07612v1-abstract-full').style.display = 'none'; document.getElementById('2407.07612v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18547">arXiv:2405.18547</a> <span> [<a href="https://arxiv.org/pdf/2405.18547">pdf</a>, <a href="https://arxiv.org/ps/2405.18547">ps</a>, <a href="https://arxiv.org/format/2405.18547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> User Perception of CAPTCHAs: A Comparative Study between University and Internet Users </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Arun Reddy</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuan Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18547v1-abstract-short" style="display: inline;"> CAPTCHAs are commonly used to distinguish between human and bot users on the web. However, despite having various types of CAPTCHAs, there are still concerns about their security and usability. To address these concerns, we surveyed over 250 participants from a university campus and Amazon Mechanical Turk. Our goal was to gather user perceptions regarding the security and usability of current CAPT… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18547v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18547v1-abstract-full" style="display: none;"> CAPTCHAs are commonly used to distinguish between human and bot users on the web. However, despite having various types of CAPTCHAs, there are still concerns about their security and usability. To address these concerns, we surveyed over 250 participants from a university campus and Amazon Mechanical Turk. Our goal was to gather user perceptions regarding the security and usability of current CAPTCHA implementations. After analyzing the data using statistical and thematic methods, we found that users struggle to navigate current CAPTCHA challenges due to increasing difficulty levels. As a result, they experience frustration, which negatively impacts their user experience. Additionally, participants expressed concerns about the reliability and security of these systems. Our findings can offer valuable insights for creating more secure and user-friendly CAPTCHA technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18547v1-abstract-full').style.display = 'none'; document.getElementById('2405.18547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.07921">arXiv:2405.07921</a> <span> [<a href="https://arxiv.org/pdf/2405.07921">pdf</a>, <a href="https://arxiv.org/format/2405.07921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Can Better Text Semantics in Prompt Tuning Improve VLM Generalization? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kuchibhotla%2C+H+C">Hari Chandana Kuchibhotla</a>, <a href="/search/cs?searchtype=author&query=Kancheti%2C+S+S">Sai Srinivas Kancheti</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.07921v2-abstract-short" style="display: inline;"> Going beyond mere fine-tuning of vision-language models (VLMs), learnable prompt tuning has emerged as a promising, resource-efficient alternative. Despite their potential, effectively learning prompts faces the following challenges: (i) training in a low-shot scenario results in overfitting, limiting adaptability, and yielding weaker performance on newer classes or datasets; (ii) prompt-tuning's… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07921v2-abstract-full').style.display = 'inline'; document.getElementById('2405.07921v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.07921v2-abstract-full" style="display: none;"> Going beyond mere fine-tuning of vision-language models (VLMs), learnable prompt tuning has emerged as a promising, resource-efficient alternative. Despite their potential, effectively learning prompts faces the following challenges: (i) training in a low-shot scenario results in overfitting, limiting adaptability, and yielding weaker performance on newer classes or datasets; (ii) prompt-tuning's efficacy heavily relies on the label space, with decreased performance in large class spaces, signaling potential gaps in bridging image and class concepts. In this work, we investigate whether better text semantics can help address these concerns. In particular, we introduce a prompt-tuning method that leverages class descriptions obtained from Large Language Models (LLMs). These class descriptions are used to bridge image and text modalities. Our approach constructs part-level description-guided image and text features, which are subsequently aligned to learn more generalizable prompts. Our comprehensive experiments conducted across 11 benchmark datasets show that our method outperforms established methods, demonstrating substantial improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07921v2-abstract-full').style.display = 'none'; document.getElementById('2405.07921v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15760">arXiv:2404.15760</a> <span> [<a href="https://arxiv.org/pdf/2404.15760">pdf</a>, <a href="https://arxiv.org/format/2404.15760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Debiasing Machine Unlearning with Counterfactual Examples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Ziheng Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jia Wang</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+J">Jun Zhuang</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Silvestri%2C+F">Fabrizio Silvestri</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jin Huang</a>, <a href="/search/cs?searchtype=author&query=Nag%2C+K">Kaushiki Nag</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+K">Kun Kuang</a>, <a href="/search/cs?searchtype=author&query=Ning%2C+X">Xin Ning</a>, <a href="/search/cs?searchtype=author&query=Tolomei%2C+G">Gabriele Tolomei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15760v1-abstract-short" style="display: inline;"> The right to be forgotten (RTBF) seeks to safeguard individuals from the enduring effects of their historical actions by implementing machine-learning techniques. These techniques facilitate the deletion of previously acquired knowledge without requiring extensive model retraining. However, they often overlook a critical issue: unlearning processes bias. This bias emerges from two main sources: (1… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15760v1-abstract-full').style.display = 'inline'; document.getElementById('2404.15760v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15760v1-abstract-full" style="display: none;"> The right to be forgotten (RTBF) seeks to safeguard individuals from the enduring effects of their historical actions by implementing machine-learning techniques. These techniques facilitate the deletion of previously acquired knowledge without requiring extensive model retraining. However, they often overlook a critical issue: unlearning processes bias. This bias emerges from two main sources: (1) data-level bias, characterized by uneven data removal, and (2) algorithm-level bias, which leads to the contamination of the remaining dataset, thereby degrading model accuracy. In this work, we analyze the causal factors behind the unlearning process and mitigate biases at both data and algorithmic levels. Typically, we introduce an intervention-based approach, where knowledge to forget is erased with a debiased dataset. Besides, we guide the forgetting procedure by leveraging counterfactual examples, as they maintain semantic data consistency without hurting performance on the remaining dataset. Experimental results demonstrate that our method outperforms existing machine unlearning baselines on evaluation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15760v1-abstract-full').style.display = 'none'; document.getElementById('2404.15760v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07540">arXiv:2403.07540</a> <span> [<a href="https://arxiv.org/pdf/2403.07540">pdf</a>, <a href="https://arxiv.org/format/2403.07540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> WannaLaugh: A Configurable Ransomware Emulator -- Learning to Mimic Malicious Storage Traces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Diamantopoulos%2C+D">Dionysios Diamantopoulos</a>, <a href="/search/cs?searchtype=author&query=Pletka%2C+R">Roman Pletka</a>, <a href="/search/cs?searchtype=author&query=Sarafijanovic%2C+S">Slavisa Sarafijanovic</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+L+N">A. L. Narasimha Reddy</a>, <a href="/search/cs?searchtype=author&query=Pozidis%2C+H">Haris Pozidis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07540v2-abstract-short" style="display: inline;"> Ransomware, a fearsome and rapidly evolving cybersecurity threat, continues to inflict severe consequences on individuals and organizations worldwide. Traditional detection methods, reliant on static signatures and application behavioral patterns, are challenged by the dynamic nature of these threats. This paper introduces three primary contributions to address this challenge. First, we introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07540v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07540v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07540v2-abstract-full" style="display: none;"> Ransomware, a fearsome and rapidly evolving cybersecurity threat, continues to inflict severe consequences on individuals and organizations worldwide. Traditional detection methods, reliant on static signatures and application behavioral patterns, are challenged by the dynamic nature of these threats. This paper introduces three primary contributions to address this challenge. First, we introduce a ransomware emulator. This tool is designed to safely mimic ransomware attacks without causing actual harm or spreading malware, making it a unique solution for studying ransomware behavior. Second, we demonstrate how we use this emulator to create storage I/O traces. These traces are then utilized to train machine-learning models. Our results show that these models are effective in detecting ransomware, highlighting the practical application of our emulator in developing responsible cybersecurity tools. Third, we show how our emulator can be used to mimic the I/O behavior of existing ransomware thereby enabling safe trace collection. Both the emulator and its application represent significant steps forward in ransomware detection in the era of machine-learning-driven cybersecurity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07540v2-abstract-full').style.display = 'none'; document.getElementById('2403.07540v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.02914">arXiv:2312.02914</a> <span> [<a href="https://arxiv.org/pdf/2312.02914">pdf</a>, <a href="https://arxiv.org/format/2312.02914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Arun Reddy</a>, <a href="/search/cs?searchtype=author&query=Paul%2C+W">William Paul</a>, <a href="/search/cs?searchtype=author&query=Rivera%2C+C">Corban Rivera</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+K">Ketul Shah</a>, <a href="/search/cs?searchtype=author&query=de+Melo%2C+C+M">Celso M. de Melo</a>, <a href="/search/cs?searchtype=author&query=Chellappa%2C+R">Rama Chellappa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.02914v5-abstract-short" style="display: inline;"> In this work, we tackle the problem of unsupervised domain adaptation (UDA) for video action recognition. Our approach, which we call UNITE, uses an image teacher model to adapt a video student model to the target domain. UNITE first employs self-supervised pre-training to promote discriminative feature learning on target domain videos using a teacher-guided masked distillation objective. We then… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.02914v5-abstract-full').style.display = 'inline'; document.getElementById('2312.02914v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.02914v5-abstract-full" style="display: none;"> In this work, we tackle the problem of unsupervised domain adaptation (UDA) for video action recognition. Our approach, which we call UNITE, uses an image teacher model to adapt a video student model to the target domain. UNITE first employs self-supervised pre-training to promote discriminative feature learning on target domain videos using a teacher-guided masked distillation objective. We then perform self-training on masked target data, using the video student model and image teacher model together to generate improved pseudolabels for unlabeled target videos. Our self-training process successfully leverages the strengths of both models to achieve strong transfer performance across domains. We evaluate our approach on multiple video domain adaptation benchmarks and observe significant improvements upon previously reported results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.02914v5-abstract-full').style.display = 'none'; document.getElementById('2312.02914v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at CVPR 2024. 13 pages, 4 figures. Approved for public release: distribution unlimited</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15117">arXiv:2310.15117</a> <span> [<a href="https://arxiv.org/pdf/2310.15117">pdf</a>, <a href="https://arxiv.org/format/2310.15117">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Causal Inference Using LLM-Guided Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vashishtha%2C+A">Aniket Vashishtha</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Abhinav Kumar</a>, <a href="/search/cs?searchtype=author&query=Bachu%2C+S">Saketh Bachu</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15117v1-abstract-short" style="display: inline;"> At the core of causal inference lies the challenge of determining reliable causal graphs solely based on observational data. Since the well-known backdoor criterion depends on the graph, any errors in the graph can propagate downstream to effect inference. In this work, we initially show that complete graph information is not necessary for causal effect inference; the topological order over graph… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15117v1-abstract-full').style.display = 'inline'; document.getElementById('2310.15117v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15117v1-abstract-full" style="display: none;"> At the core of causal inference lies the challenge of determining reliable causal graphs solely based on observational data. Since the well-known backdoor criterion depends on the graph, any errors in the graph can propagate downstream to effect inference. In this work, we initially show that complete graph information is not necessary for causal effect inference; the topological order over graph variables (causal order) alone suffices. Further, given a node pair, causal order is easier to elicit from domain experts compared to graph edges since determining the existence of an edge can depend extensively on other variables. Interestingly, we find that the same principle holds for Large Language Models (LLMs) such as GPT-3.5-turbo and GPT-4, motivating an automated method to obtain causal order (and hence causal effect) with LLMs acting as virtual domain experts. To this end, we employ different prompting strategies and contextual cues to propose a robust technique of obtaining causal order from LLMs. Acknowledging LLMs' limitations, we also study possible techniques to integrate LLMs with established causal discovery algorithms, including constraint-based and score-based methods, to enhance their performance. Extensive experiments demonstrate that our approach significantly improves causal ordering accuracy as compared to discovery algorithms, highlighting the potential of LLMs to enhance causal inference across diverse fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15117v1-abstract-full').style.display = 'none'; document.getElementById('2310.15117v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01030">arXiv:2309.01030</a> <span> [<a href="https://arxiv.org/pdf/2309.01030">pdf</a>, <a href="https://arxiv.org/format/2309.01030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Online Adaptive Mahalanobis Distance Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+L">Lianke Qin</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Aravind Reddy</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01030v2-abstract-short" style="display: inline;"> Mahalanobis metrics are widely used in machine learning in conjunction with methods like $k$-nearest neighbors, $k$-means clustering, and $k$-medians clustering. Despite their importance, there has not been any prior work on applying sketching techniques to speed up algorithms for Mahalanobis metrics. In this paper, we initiate the study of dimension reduction for Mahalanobis metrics. In particula… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01030v2-abstract-full').style.display = 'inline'; document.getElementById('2309.01030v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01030v2-abstract-full" style="display: none;"> Mahalanobis metrics are widely used in machine learning in conjunction with methods like $k$-nearest neighbors, $k$-means clustering, and $k$-medians clustering. Despite their importance, there has not been any prior work on applying sketching techniques to speed up algorithms for Mahalanobis metrics. In this paper, we initiate the study of dimension reduction for Mahalanobis metrics. In particular, we provide efficient data structures for solving the Approximate Distance Estimation (ADE) problem for Mahalanobis distances. We first provide a randomized Monte Carlo data structure. Then, we show how we can adapt it to provide our main data structure which can handle sequences of \textit{adaptive} queries and also online updates to both the Mahalanobis metric matrix and the data points, making it amenable to be used in conjunction with prior algorithms for online learning of Mahalanobis metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01030v2-abstract-full').style.display = 'none'; document.getElementById('2309.01030v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE BigData 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.18183">arXiv:2305.18183</a> <span> [<a href="https://arxiv.org/pdf/2305.18183">pdf</a>, <a href="https://arxiv.org/format/2305.18183">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On Counterfactual Data Augmentation Under Confounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Bachu%2C+S">Saketh Bachu</a>, <a href="/search/cs?searchtype=author&query=Dash%2C+S">Saloni Dash</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+C">Charchit Sharma</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.18183v2-abstract-short" style="display: inline;"> Counterfactual data augmentation has recently emerged as a method to mitigate confounding biases in the training data. These biases, such as spurious correlations, arise due to various observed and unobserved confounding variables in the data generation process. In this paper, we formally analyze how confounding biases impact downstream classifiers and present a causal viewpoint to the solutions b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18183v2-abstract-full').style.display = 'inline'; document.getElementById('2305.18183v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.18183v2-abstract-full" style="display: none;"> Counterfactual data augmentation has recently emerged as a method to mitigate confounding biases in the training data. These biases, such as spurious correlations, arise due to various observed and unobserved confounding variables in the data generation process. In this paper, we formally analyze how confounding biases impact downstream classifiers and present a causal viewpoint to the solutions based on counterfactual data augmentation. We explore how removing confounding biases serves as a means to learn invariant features, ultimately aiding in generalization beyond the observed data distribution. Additionally, we present a straightforward yet powerful algorithm for generating counterfactual images, which effectively mitigates the influence of confounding effects on downstream classifiers. Through experiments on MNIST variants and the CelebA datasets, we demonstrate how our simple augmentation method helps existing state-of-the-art methods achieve good results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18183v2-abstract-full').style.display = 'none'; document.getElementById('2305.18183v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.17300">arXiv:2305.17300</a> <span> [<a href="https://arxiv.org/pdf/2305.17300">pdf</a>, <a href="https://arxiv.org/format/2305.17300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Large Neuroimaging Datasets to Create Connectome-Constrained Approaches for more Robust, Efficient, and Adaptable Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Johnson%2C+E+C">Erik C. Johnson</a>, <a href="/search/cs?searchtype=author&query=Robinson%2C+B+S">Brian S. Robinson</a>, <a href="/search/cs?searchtype=author&query=Vallabha%2C+G+K">Gautam K. Vallabha</a>, <a href="/search/cs?searchtype=author&query=Joyce%2C+J">Justin Joyce</a>, <a href="/search/cs?searchtype=author&query=Matelsky%2C+J+K">Jordan K. Matelsky</a>, <a href="/search/cs?searchtype=author&query=Norman-Tenazas%2C+R">Raphael Norman-Tenazas</a>, <a href="/search/cs?searchtype=author&query=Western%2C+I">Isaac Western</a>, <a href="/search/cs?searchtype=author&query=Villafa%C3%B1e-Delgado%2C+M">Marisel Villafa帽e-Delgado</a>, <a href="/search/cs?searchtype=author&query=Cervantes%2C+M">Martha Cervantes</a>, <a href="/search/cs?searchtype=author&query=Robinette%2C+M+S">Michael S. Robinette</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+V">Arun V. Reddy</a>, <a href="/search/cs?searchtype=author&query=Kitchell%2C+L">Lindsey Kitchell</a>, <a href="/search/cs?searchtype=author&query=Rivlin%2C+P+K">Patricia K. Rivlin</a>, <a href="/search/cs?searchtype=author&query=Reilly%2C+E+P">Elizabeth P. Reilly</a>, <a href="/search/cs?searchtype=author&query=Drenkow%2C+N">Nathan Drenkow</a>, <a href="/search/cs?searchtype=author&query=Roos%2C+M+J">Matthew J. Roos</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+I">I-Jeng Wang</a>, <a href="/search/cs?searchtype=author&query=Wester%2C+B+A">Brock A. Wester</a>, <a href="/search/cs?searchtype=author&query=Gray-Roncal%2C+W+R">William R. Gray-Roncal</a>, <a href="/search/cs?searchtype=author&query=Hoffmann%2C+J+A">Joan A. Hoffmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.17300v1-abstract-short" style="display: inline;"> Despite the progress in deep learning networks, efficient learning at the edge (enabling adaptable, low-complexity machine learning solutions) remains a critical need for defense and commercial applications. We envision a pipeline to utilize large neuroimaging datasets, including maps of the brain which capture neuron and synapse connectivity, to improve machine learning approaches. We have pursue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17300v1-abstract-full').style.display = 'inline'; document.getElementById('2305.17300v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.17300v1-abstract-full" style="display: none;"> Despite the progress in deep learning networks, efficient learning at the edge (enabling adaptable, low-complexity machine learning solutions) remains a critical need for defense and commercial applications. We envision a pipeline to utilize large neuroimaging datasets, including maps of the brain which capture neuron and synapse connectivity, to improve machine learning approaches. We have pursued different approaches within this pipeline structure. First, as a demonstration of data-driven discovery, the team has developed a technique for discovery of repeated subcircuits, or motifs. These were incorporated into a neural architecture search approach to evolve network architectures. Second, we have conducted analysis of the heading direction circuit in the fruit fly, which performs fusion of visual and angular velocity features, to explore augmenting existing computational models with new insight. Our team discovered a novel pattern of connectivity, implemented a new model, and demonstrated sensor fusion on a robotic platform. Third, the team analyzed circuitry for memory formation in the fruit fly connectome, enabling the design of a novel generative replay approach. Finally, the team has begun analysis of connectivity in mammalian cortex to explore potential improvements to transformer networks. These constraints increased network robustness on the most challenging examples in the CIFAR-10-C computer vision robustness benchmark task, while reducing learnable attention parameters by over an order of magnitude. Taken together, these results demonstrate multiple potential approaches to utilize insight from neural systems for developing robust and efficient machine learning techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17300v1-abstract-full').style.display = 'none'; document.getElementById('2305.17300v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.06934">arXiv:2304.06934</a> <span> [<a href="https://arxiv.org/pdf/2304.06934">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Classification of social media Toxic comments using Machine learning models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Poojitha%2C+K">K. Poojitha</a>, <a href="/search/cs?searchtype=author&query=Charish%2C+A+S">A. Sai Charish</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+M+A+K">M. Arun Kuamr Reddy</a>, <a href="/search/cs?searchtype=author&query=Ayyasamy%2C+S">S. Ayyasamy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.06934v1-abstract-short" style="display: inline;"> The abstract outlines the problem of toxic comments on social media platforms, where individuals use disrespectful, abusive, and unreasonable language that can drive users away from discussions. This behavior is referred to as anti-social behavior, which occurs during online debates, comments, and fights. The comments containing explicit language can be classified into various categories, such as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06934v1-abstract-full').style.display = 'inline'; document.getElementById('2304.06934v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.06934v1-abstract-full" style="display: none;"> The abstract outlines the problem of toxic comments on social media platforms, where individuals use disrespectful, abusive, and unreasonable language that can drive users away from discussions. This behavior is referred to as anti-social behavior, which occurs during online debates, comments, and fights. The comments containing explicit language can be classified into various categories, such as toxic, severe toxic, obscene, threat, insult, and identity hate. This behavior leads to online harassment and cyberbullying, which forces individuals to stop expressing their opinions and ideas. To protect users from offensive language, companies have started flagging comments and blocking users. The abstract proposes to create a classifier using an Lstm-cnn model that can differentiate between toxic and non-toxic comments with high accuracy. The classifier can help organizations examine the toxicity of the comment section better. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.06934v1-abstract-full').style.display = 'none'; document.getElementById('2304.06934v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.13850">arXiv:2303.13850</a> <span> [<a href="https://arxiv.org/pdf/2303.13850">pdf</a>, <a href="https://arxiv.org/format/2303.13850">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Towards Learning and Explaining Indirect Causal Effects in Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Bachu%2C+S">Saketh Bachu</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+H">Harsharaj Pathak</a>, <a href="/search/cs?searchtype=author&query=Godfrey%2C+B+L">Benin L Godfrey</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N. Balasubramanian</a>, <a href="/search/cs?searchtype=author&query=V%2C+V">Varshaneya V</a>, <a href="/search/cs?searchtype=author&query=Kar%2C+S+N">Satya Narayanan Kar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.13850v3-abstract-short" style="display: inline;"> Recently, there has been a growing interest in learning and explaining causal effects within Neural Network (NN) models. By virtue of NN architectures, previous approaches consider only direct and total causal effects assuming independence among input variables. We view an NN as a structural causal model (SCM) and extend our focus to include indirect causal effects by introducing feedforward conne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13850v3-abstract-full').style.display = 'inline'; document.getElementById('2303.13850v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.13850v3-abstract-full" style="display: none;"> Recently, there has been a growing interest in learning and explaining causal effects within Neural Network (NN) models. By virtue of NN architectures, previous approaches consider only direct and total causal effects assuming independence among input variables. We view an NN as a structural causal model (SCM) and extend our focus to include indirect causal effects by introducing feedforward connections among input neurons. We propose an ante-hoc method that captures and maintains direct, indirect, and total causal effects during NN model training. We also propose an algorithm for quantifying learned causal effects in an NN model and efficient approximation strategies for quantifying causal effects in high-dimensional data. Extensive experiments conducted on synthetic and real-world datasets demonstrate that the causal effects learned by our ante-hoc method better approximate the ground truth effects compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13850v3-abstract-full').style.display = 'none'; document.getElementById('2303.13850v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.10280">arXiv:2303.10280</a> <span> [<a href="https://arxiv.org/pdf/2303.10280">pdf</a>, <a href="https://arxiv.org/format/2303.10280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Synthetic-to-Real Domain Adaptation for Action Recognition: A Dataset and Baseline Performances </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+V">Arun V. Reddy</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+K">Ketul Shah</a>, <a href="/search/cs?searchtype=author&query=Paul%2C+W">William Paul</a>, <a href="/search/cs?searchtype=author&query=Mocharla%2C+R">Rohita Mocharla</a>, <a href="/search/cs?searchtype=author&query=Hoffman%2C+J">Judy Hoffman</a>, <a href="/search/cs?searchtype=author&query=Katyal%2C+K+D">Kapil D. Katyal</a>, <a href="/search/cs?searchtype=author&query=Manocha%2C+D">Dinesh Manocha</a>, <a href="/search/cs?searchtype=author&query=de+Melo%2C+C+M">Celso M. de Melo</a>, <a href="/search/cs?searchtype=author&query=Chellappa%2C+R">Rama Chellappa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.10280v2-abstract-short" style="display: inline;"> Human action recognition is a challenging problem, particularly when there is high variability in factors such as subject appearance, backgrounds and viewpoint. While deep neural networks (DNNs) have been shown to perform well on action recognition tasks, they typically require large amounts of high-quality labeled data to achieve robust performance across a variety of conditions. Synthetic data h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.10280v2-abstract-full').style.display = 'inline'; document.getElementById('2303.10280v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.10280v2-abstract-full" style="display: none;"> Human action recognition is a challenging problem, particularly when there is high variability in factors such as subject appearance, backgrounds and viewpoint. While deep neural networks (DNNs) have been shown to perform well on action recognition tasks, they typically require large amounts of high-quality labeled data to achieve robust performance across a variety of conditions. Synthetic data has shown promise as a way to avoid the substantial costs and potential ethical concerns associated with collecting and labeling enormous amounts of data in the real-world. However, synthetic data may differ from real data in important ways. This phenomenon, known as \textit{domain shift}, can limit the utility of synthetic data in robotics applications. To mitigate the effects of domain shift, substantial effort is being dedicated to the development of domain adaptation (DA) techniques. Yet, much remains to be understood about how best to develop these techniques. In this paper, we introduce a new dataset called Robot Control Gestures (RoCoG-v2). The dataset is composed of both real and synthetic videos from seven gesture classes, and is intended to support the study of synthetic-to-real domain shift for video-based action recognition. Our work expands upon existing datasets by focusing the action classes on gestures for human-robot teaming, as well as by enabling investigation of domain shift in both ground and aerial views. We present baseline results using state-of-the-art action recognition and domain adaptation algorithms and offer initial insight on tackling the synthetic-to-real and ground-to-air domain shifts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.10280v2-abstract-full').style.display = 'none'; document.getElementById('2303.10280v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICRA 2023. The first two authors contributed equally. Dataset available at: https://github.com/reddyav1/RoCoG-v2</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08162">arXiv:2303.08162</a> <span> [<a href="https://arxiv.org/pdf/2303.08162">pdf</a>, <a href="https://arxiv.org/format/2303.08162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Strongly Correlated Electrons">cond-mat.str-el</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Artificial intelligence for artificial materials: moir茅 atom </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+D">Di Luo</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+P">Aidan P. Reddy</a>, <a href="/search/cs?searchtype=author&query=Devakul%2C+T">Trithep Devakul</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+L">Liang Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08162v2-abstract-short" style="display: inline;"> Moir茅 engineering in atomically thin van der Waals heterostructures creates artificial quantum materials with designer properties. We solve the many-body problem of interacting electrons confined to a moir茅 superlattice potential minimum (the moir茅 atom) using a 2D fermionic neural network. We show that strong Coulomb interactions in combination with the anisotropic moir茅 potential lead to strikin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08162v2-abstract-full').style.display = 'inline'; document.getElementById('2303.08162v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08162v2-abstract-full" style="display: none;"> Moir茅 engineering in atomically thin van der Waals heterostructures creates artificial quantum materials with designer properties. We solve the many-body problem of interacting electrons confined to a moir茅 superlattice potential minimum (the moir茅 atom) using a 2D fermionic neural network. We show that strong Coulomb interactions in combination with the anisotropic moir茅 potential lead to striking ``Wigner molecule" charge density distributions observable with scanning tunneling microscopy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08162v2-abstract-full').style.display = 'none'; document.getElementById('2303.08162v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> MIT-CTP/5534 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.11408">arXiv:2212.11408</a> <span> [<a href="https://arxiv.org/pdf/2212.11408">pdf</a>, <a href="https://arxiv.org/ps/2212.11408">ps</a>, <a href="https://arxiv.org/format/2212.11408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive and Dynamic Multi-Resolution Hashing for Pairwise Summations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+L">Lianke Qin</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Aravind Reddy</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhao Song</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhaozhuo Xu</a>, <a href="/search/cs?searchtype=author&query=Zhuo%2C+D">Danyang Zhuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.11408v1-abstract-short" style="display: inline;"> In this paper, we propose Adam-Hash: an adaptive and dynamic multi-resolution hashing data-structure for fast pairwise summation estimation. Given a data-set $X \subset \mathbb{R}^d$, a binary function $f:\mathbb{R}^d\times \mathbb{R}^d\to \mathbb{R}$, and a point $y \in \mathbb{R}^d$, the Pairwise Summation Estimate $\mathrm{PSE}_X(y) := \frac{1}{|X|} \sum_{x \in X} f(x,y)$. For any given data-se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.11408v1-abstract-full').style.display = 'inline'; document.getElementById('2212.11408v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.11408v1-abstract-full" style="display: none;"> In this paper, we propose Adam-Hash: an adaptive and dynamic multi-resolution hashing data-structure for fast pairwise summation estimation. Given a data-set $X \subset \mathbb{R}^d$, a binary function $f:\mathbb{R}^d\times \mathbb{R}^d\to \mathbb{R}$, and a point $y \in \mathbb{R}^d$, the Pairwise Summation Estimate $\mathrm{PSE}_X(y) := \frac{1}{|X|} \sum_{x \in X} f(x,y)$. For any given data-set $X$, we need to design a data-structure such that given any query point $y \in \mathbb{R}^d$, the data-structure approximately estimates $\mathrm{PSE}_X(y)$ in time that is sub-linear in $|X|$. Prior works on this problem have focused exclusively on the case where the data-set is static, and the queries are independent. In this paper, we design a hashing-based PSE data-structure which works for the more practical \textit{dynamic} setting in which insertions, deletions, and replacements of points are allowed. Moreover, our proposed Adam-Hash is also robust to adaptive PSE queries, where an adversary can choose query $q_j \in \mathbb{R}^d$ depending on the output from previous queries $q_1, q_2, \dots, q_{j-1}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.11408v1-abstract-full').style.display = 'none'; document.getElementById('2212.11408v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BigData 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.15494">arXiv:2211.15494</a> <span> [<a href="https://arxiv.org/pdf/2211.15494">pdf</a>, <a href="https://arxiv.org/format/2211.15494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Automated Routing of Droplets for DNA Storage on a Digital Microfluidics Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Manicka%2C+A">Ajay Manicka</a>, <a href="/search/cs?searchtype=author&query=Stephan%2C+A">Andrew Stephan</a>, <a href="/search/cs?searchtype=author&query=Chari%2C+S">Sriram Chari</a>, <a href="/search/cs?searchtype=author&query=Mendonsa%2C+G">Gemma Mendonsa</a>, <a href="/search/cs?searchtype=author&query=Okubo%2C+P">Peyton Okubo</a>, <a href="/search/cs?searchtype=author&query=Stolzberg-Schray%2C+J">John Stolzberg-Schray</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Anil Reddy</a>, <a href="/search/cs?searchtype=author&query=Riedel%2C+M">Marc Riedel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.15494v5-abstract-short" style="display: inline;"> Technologies for sequencing (reading) and synthesizing (writing) DNA have progressed on a Moore's law-like trajectory over the last three decades. This has motivated the idea of using DNA for data storage. Theoretically, DNA-based storage systems could out-compete all existing forms of archival storage. However, a large gap exists between what is theoretically possible in terms of read and write s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.15494v5-abstract-full').style.display = 'inline'; document.getElementById('2211.15494v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.15494v5-abstract-full" style="display: none;"> Technologies for sequencing (reading) and synthesizing (writing) DNA have progressed on a Moore's law-like trajectory over the last three decades. This has motivated the idea of using DNA for data storage. Theoretically, DNA-based storage systems could out-compete all existing forms of archival storage. However, a large gap exists between what is theoretically possible in terms of read and write speeds and what has been practically demonstrated with DNA. This paper introduces a novel approach to DNA storage, with automated assembly on a digital microfluidic biochip. This technology offers unprecedented parallelism in DNA assembly using a dual library of "symbols" and "linkers". An algorithmic solution is discussed for the problem of managing droplet traffic on the device, with prioritized three-dimensional "A*" routing. An overview is given of the software that was developed for routing a large number of droplets in parallel on the device, minimizing congestion and maximizing throughput. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.15494v5-abstract-full').style.display = 'none'; document.getElementById('2211.15494v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 23 figures. Submitted to the journal "Lab on a Chip" for publication by the Royal Society of Chemistry</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.04370">arXiv:2211.04370</a> <span> [<a href="https://arxiv.org/pdf/2211.04370">pdf</a>, <a href="https://arxiv.org/format/2211.04370">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> NESTER: An Adaptive Neurosymbolic Method for Causal Effect Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.04370v5-abstract-short" style="display: inline;"> Causal effect estimation from observational data is a central problem in causal inference. Methods based on potential outcomes framework solve this problem by exploiting inductive biases and heuristics from causal inference. Each of these methods addresses a specific aspect of causal effect estimation, such as controlling propensity score, enforcing randomization, etc., by designing neural network… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04370v5-abstract-full').style.display = 'inline'; document.getElementById('2211.04370v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.04370v5-abstract-full" style="display: none;"> Causal effect estimation from observational data is a central problem in causal inference. Methods based on potential outcomes framework solve this problem by exploiting inductive biases and heuristics from causal inference. Each of these methods addresses a specific aspect of causal effect estimation, such as controlling propensity score, enforcing randomization, etc., by designing neural network (NN) architectures and regularizers. In this paper, we propose an adaptive method called Neurosymbolic Causal Effect Estimator (NESTER), a generalized method for causal effect estimation. NESTER integrates the ideas used in existing methods based on multi-head NNs for causal effect estimation into one framework. We design a Domain Specific Language (DSL) tailored for causal effect estimation based on causal inductive biases used in literature. We conduct a theoretical analysis to investigate NESTER's efficacy in estimating causal effects. Our comprehensive empirical results show that NESTER performs better than state-of-the-art methods on benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04370v5-abstract-full').style.display = 'none'; document.getElementById('2211.04370v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12368">arXiv:2210.12368</a> <span> [<a href="https://arxiv.org/pdf/2210.12368">pdf</a>, <a href="https://arxiv.org/format/2210.12368">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Counterfactual Generation Under Confounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Dash%2C+S">Saloni Dash</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12368v2-abstract-short" style="display: inline;"> A machine learning model, under the influence of observed or unobserved confounders in the training data, can learn spurious correlations and fail to generalize when deployed. For image classifiers, augmenting a training dataset using counterfactual examples has been empirically shown to break spurious correlations. However, the counterfactual generation task itself becomes more difficult as the l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12368v2-abstract-full').style.display = 'inline'; document.getElementById('2210.12368v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12368v2-abstract-full" style="display: none;"> A machine learning model, under the influence of observed or unobserved confounders in the training data, can learn spurious correlations and fail to generalize when deployed. For image classifiers, augmenting a training dataset using counterfactual examples has been empirically shown to break spurious correlations. However, the counterfactual generation task itself becomes more difficult as the level of confounding increases. Existing methods for counterfactual generation under confounding consider a fixed set of interventions (e.g., texture, rotation) and are not flexible enough to capture diverse data-generating processes. Given a causal generative process, we formally characterize the adverse effects of confounding on any downstream tasks and show that the correlation between generative factors (attributes) can be used to quantitatively measure confounding between generative factors. To minimize such correlation, we propose a counterfactual generation method that learns to modify the value of any attribute in an image and generate new images given a set of observed attributes, even when the dataset is highly confounded. These counterfactual images are then used to regularize the downstream classifier such that the learned representations are the same across various generative factors conditioned on the class label. Our method is computationally efficient, simple to implement, and works well for any number of generative factors and confounding variables. Our experimental results on both synthetic (MNIST variants) and real-world (CelebA) datasets show the usefulness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12368v2-abstract-full').style.display = 'none'; document.getElementById('2210.12368v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.11722">arXiv:2210.11722</a> <span> [<a href="https://arxiv.org/pdf/2210.11722">pdf</a>, <a href="https://arxiv.org/format/2210.11722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Adaptive re-calibration of channel-wise features for Adversarial Audio Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dongre%2C+V">Vardhan Dongre</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+T">Abhinav Thimma Reddy</a>, <a href="/search/cs?searchtype=author&query=Reddeddy%2C+N">Nikhitha Reddeddy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.11722v1-abstract-short" style="display: inline;"> DeepFake Audio, unlike DeepFake images and videos, has been relatively less explored from detection perspective, and the solutions which exist for the synthetic speech classification either use complex networks or dont generalize to different varieties of synthetic speech obtained using different generative and optimization-based methods. Through this work, we propose a channel-wise recalibration… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11722v1-abstract-full').style.display = 'inline'; document.getElementById('2210.11722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.11722v1-abstract-full" style="display: none;"> DeepFake Audio, unlike DeepFake images and videos, has been relatively less explored from detection perspective, and the solutions which exist for the synthetic speech classification either use complex networks or dont generalize to different varieties of synthetic speech obtained using different generative and optimization-based methods. Through this work, we propose a channel-wise recalibration of features using attention feature fusion for synthetic speech detection and compare its performance against different detection methods including End2End models and Resnet-based models on synthetic speech generated using Text to Speech and Vocoder systems like WaveNet, WaveRNN, Tactotron, and WaveGlow. We also experiment with Squeeze Excitation (SE) blocks in our Resnet models and found that the combination was able to get better performance. In addition to the analysis, we also demonstrate that the combination of Linear frequency cepstral coefficients (LFCC) and Mel Frequency cepstral coefficients (MFCC) using the attentional feature fusion technique creates better input features representations which can help even simpler models generalize well on synthetic speech classification tasks. Our models (Resnet based using feature fusion) trained on Fake or Real (FoR) dataset and were able to achieve 95% test accuracy with the FoR data, and an average of 90% accuracy with samples we generated using different generative models after adapting this framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11722v1-abstract-full').style.display = 'none'; document.getElementById('2210.11722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 8 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.03961">arXiv:2210.03961</a> <span> [<a href="https://arxiv.org/pdf/2210.03961">pdf</a>, <a href="https://arxiv.org/ps/2210.03961">ps</a>, <a href="https://arxiv.org/format/2210.03961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Tensor Product Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Aravind Reddy</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lichen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.03961v2-abstract-short" style="display: inline;"> In this work, we initiate the study of \emph{Dynamic Tensor Product Regression}. One has matrices $A_1\in \mathbb{R}^{n_1\times d_1},\ldots,A_q\in \mathbb{R}^{n_q\times d_q}$ and a label vector $b\in \mathbb{R}^{n_1\ldots n_q}$, and the goal is to solve the regression problem with the design matrix $A$ being the tensor product of the matrices $A_1, A_2, \dots, A_q$ i.e.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03961v2-abstract-full').style.display = 'inline'; document.getElementById('2210.03961v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.03961v2-abstract-full" style="display: none;"> In this work, we initiate the study of \emph{Dynamic Tensor Product Regression}. One has matrices $A_1\in \mathbb{R}^{n_1\times d_1},\ldots,A_q\in \mathbb{R}^{n_q\times d_q}$ and a label vector $b\in \mathbb{R}^{n_1\ldots n_q}$, and the goal is to solve the regression problem with the design matrix $A$ being the tensor product of the matrices $A_1, A_2, \dots, A_q$ i.e. $\min_{x\in \mathbb{R}^{d_1\ldots d_q}}~\|(A_1\otimes \ldots\otimes A_q)x-b\|_2$. At each time step, one matrix $A_i$ receives a sparse change, and the goal is to maintain a sketch of the tensor product $A_1\otimes\ldots \otimes A_q$ so that the regression solution can be updated quickly. Recomputing the solution from scratch for each round is very slow and so it is important to develop algorithms which can quickly update the solution with the new design matrix. Our main result is a dynamic tree data structure where any update to a single matrix can be propagated quickly throughout the tree. We show that our data structure can be used to solve dynamic versions of not only Tensor Product Regression, but also Tensor Product Spline regression (which is a generalization of ridge regression) and for maintaining Low Rank Approximations for the tensor product. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03961v2-abstract-full').style.display = 'none'; document.getElementById('2210.03961v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.13482">arXiv:2209.13482</a> <span> [<a href="https://arxiv.org/pdf/2209.13482">pdf</a>, <a href="https://arxiv.org/format/2209.13482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Space Physics">physics.space-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Earth and Planetary Astrophysics">astro-ph.EP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1029/2022JA031183">10.1029/2022JA031183 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Predicting Swarm Equatorial Plasma Bubbles via Machine Learning and Shapley Values </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+S+A">S. A. Reddy</a>, <a href="/search/cs?searchtype=author&query=Forsyth%2C+C">C. Forsyth</a>, <a href="/search/cs?searchtype=author&query=Aruliah%2C+A">A. Aruliah</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+A">A. Smith</a>, <a href="/search/cs?searchtype=author&query=Bortnik%2C+J">J. Bortnik</a>, <a href="/search/cs?searchtype=author&query=Aa%2C+E">E. Aa</a>, <a href="/search/cs?searchtype=author&query=Kataria%2C+D+O">D. O. Kataria</a>, <a href="/search/cs?searchtype=author&query=Lewis%2C+G">G. Lewis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.13482v2-abstract-short" style="display: inline;"> In this study we present AI Prediction of Equatorial Plasma Bubbles (APE), a machine learning model that can accurately predict the Ionospheric Bubble Index (IBI) on the Swarm spacecraft. IBI is a correlation ($R^2$) between perturbations in plasma density and the magnetic field, whose source can be Equatorial Plasma Bubbles (EPBs). EPBs have been studied for a number of years, but their day-to-da… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13482v2-abstract-full').style.display = 'inline'; document.getElementById('2209.13482v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.13482v2-abstract-full" style="display: none;"> In this study we present AI Prediction of Equatorial Plasma Bubbles (APE), a machine learning model that can accurately predict the Ionospheric Bubble Index (IBI) on the Swarm spacecraft. IBI is a correlation ($R^2$) between perturbations in plasma density and the magnetic field, whose source can be Equatorial Plasma Bubbles (EPBs). EPBs have been studied for a number of years, but their day-to-day variability has made predicting them a considerable challenge. We build an ensemble machine learning model to predict IBI. We use data from 2014-22 at a resolution of 1sec, and transform it from a time-series into a 6-dimensional space with a corresponding EPB $R^2$ (0-1) acting as the label. APE performs well across all metrics, exhibiting a skill, association and root mean squared error score of 0.96, 0.98 and 0.08 respectively. The model performs best post-sunset, in the American/Atlantic sector, around the equinoxes, and when solar activity is high. This is promising because EPBs are most likely to occur during these periods. Shapley values reveal that F10.7 is the most important feature in driving the predictions, whereas latitude is the least. The analysis also examines the relationship between the features, which reveals new insights into EPB climatology. Finally, the selection of the features means that APE could be expanded to forecasting EPBs following additional investigations into their onset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13482v2-abstract-full').style.display = 'none'; document.getElementById('2209.13482v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 Pages, 9 Figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Geophysical Research: Space Physics (2023): e2022JA031183 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.06358">arXiv:2209.06358</a> <span> [<a href="https://arxiv.org/pdf/2209.06358">pdf</a>, <a href="https://arxiv.org/format/2209.06358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Using Rater and System Metadata to Explain Variance in the VoiceMOS Challenge 2022 Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chinen%2C+M">Michael Chinen</a>, <a href="/search/cs?searchtype=author&query=Skoglund%2C+J">Jan Skoglund</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+C+K+A">Chandan K A Reddy</a>, <a href="/search/cs?searchtype=author&query=Ragano%2C+A">Alessandro Ragano</a>, <a href="/search/cs?searchtype=author&query=Hines%2C+A">Andrew Hines</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.06358v1-abstract-short" style="display: inline;"> Non-reference speech quality models are important for a growing number of applications. The VoiceMOS 2022 challenge provided a dataset of synthetic voice conversion and text-to-speech samples with subjective labels. This study looks at the amount of variance that can be explained in subjective ratings of speech quality from metadata and the distribution imbalances of the dataset. Speech quality mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06358v1-abstract-full').style.display = 'inline'; document.getElementById('2209.06358v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.06358v1-abstract-full" style="display: none;"> Non-reference speech quality models are important for a growing number of applications. The VoiceMOS 2022 challenge provided a dataset of synthetic voice conversion and text-to-speech samples with subjective labels. This study looks at the amount of variance that can be explained in subjective ratings of speech quality from metadata and the distribution imbalances of the dataset. Speech quality models were constructed using wav2vec 2.0 with additional metadata features that included rater groups and system identifiers and obtained competitive metrics including a Spearman rank correlation coefficient (SRCC) of 0.934 and MSE of 0.088 at the system-level, and 0.877 and 0.198 at the utterance-level. Using data and metadata that the test restricted or blinded further improved the metrics. A metadata analysis showed that the system-level metrics do not represent the model's system-level prediction as a result of the wide variation in the number of utterances used for each system on the validation and test datasets. We conclude that, in general, conditions should have enough utterances in the test set to bound the sample mean error, and be relatively balanced in utterance count between systems, otherwise the utterance-level metrics may be more reliable and interpretable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06358v1-abstract-full').style.display = 'none'; document.getElementById('2209.06358v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint; accepted for Interspeech 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.10737">arXiv:2208.10737</a> <span> [<a href="https://arxiv.org/pdf/2208.10737">pdf</a>, <a href="https://arxiv.org/format/2208.10737">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Semi-Automatic Labeling and Semantic Segmentation of Gram-Stained Microscopic Images from DIBaS Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=P.%2C+C+R+G">Chethan Reddy G. P.</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+P+A">Pullagurla Abhijith Reddy</a>, <a href="/search/cs?searchtype=author&query=Kanabur%2C+V+R">Vidyashree R. Kanabur</a>, <a href="/search/cs?searchtype=author&query=Vijayasenan%2C+D">Deepu Vijayasenan</a>, <a href="/search/cs?searchtype=author&query=David%2C+S+S">Sumam S. David</a>, <a href="/search/cs?searchtype=author&query=Govindan%2C+S">Sreejith Govindan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.10737v1-abstract-short" style="display: inline;"> In this paper, a semi-automatic annotation of bacteria genera and species from DIBaS dataset is implemented using clustering and thresholding algorithms. A Deep learning model is trained to achieve the semantic segmentation and classification of the bacteria species. Classification accuracy of 95% is achieved. Deep learning models find tremendous applications in biomedical image processing. Automa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.10737v1-abstract-full').style.display = 'inline'; document.getElementById('2208.10737v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.10737v1-abstract-full" style="display: none;"> In this paper, a semi-automatic annotation of bacteria genera and species from DIBaS dataset is implemented using clustering and thresholding algorithms. A Deep learning model is trained to achieve the semantic segmentation and classification of the bacteria species. Classification accuracy of 95% is achieved. Deep learning models find tremendous applications in biomedical image processing. Automatic segmentation of bacteria from gram-stained microscopic images is essential to diagnose respiratory and urinary tract infections, detect cancers, etc. Deep learning will aid the biologists to get reliable results in less time. Additionally, a lot of human intervention can be reduced. This work can be helpful to detect bacteria from urinary smear images, sputum smear images, etc to diagnose urinary tract infections, tuberculosis, pneumonia, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.10737v1-abstract-full').style.display = 'none'; document.getElementById('2208.10737v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.05750">arXiv:2206.05750</a> <span> [<a href="https://arxiv.org/pdf/2206.05750">pdf</a>, <a href="https://arxiv.org/format/2206.05750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Matching options to tasks using Option-Indexed Hierarchical Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chauhan%2C+K">Kushal Chauhan</a>, <a href="/search/cs?searchtype=author&query=Chatterjee%2C+S">Soumya Chatterjee</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Akash Reddy</a>, <a href="/search/cs?searchtype=author&query=Ravindran%2C+B">Balaraman Ravindran</a>, <a href="/search/cs?searchtype=author&query=Shenoy%2C+P">Pradeep Shenoy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.05750v1-abstract-short" style="display: inline;"> The options framework in Hierarchical Reinforcement Learning breaks down overall goals into a combination of options or simpler tasks and associated policies, allowing for abstraction in the action space. Ideally, these options can be reused across different higher-level goals; indeed, such reuse is necessary to realize the vision of a continual learning agent that can effectively leverage its pri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05750v1-abstract-full').style.display = 'inline'; document.getElementById('2206.05750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.05750v1-abstract-full" style="display: none;"> The options framework in Hierarchical Reinforcement Learning breaks down overall goals into a combination of options or simpler tasks and associated policies, allowing for abstraction in the action space. Ideally, these options can be reused across different higher-level goals; indeed, such reuse is necessary to realize the vision of a continual learning agent that can effectively leverage its prior experience. Previous approaches have only proposed limited forms of transfer of prelearned options to new task settings. We propose a novel option indexing approach to hierarchical learning (OI-HRL), where we learn an affinity function between options and the items present in the environment. This allows us to effectively reuse a large library of pretrained options, in zero-shot generalization at test time, by restricting goal-directed learning to only those options relevant to the task at hand. We develop a meta-training loop that learns the representations of options and environments over a series of HRL problems, by incorporating feedback about the relevance of retrieved options to the higher-level goal. We evaluate OI-HRL in two simulated settings - the CraftWorld and AI2THOR environments - and show that we achieve performance competitive with oracular baselines, and substantial gains over a baseline that has the entire option pool available for learning the hierarchical policy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05750v1-abstract-full').style.display = 'none'; document.getElementById('2206.05750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09973">arXiv:2205.09973</a> <span> [<a href="https://arxiv.org/pdf/2205.09973">pdf</a>, <a href="https://arxiv.org/format/2205.09973">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> An In-Pipe Inspection Robot With Sensorless Underactuated Magnets and Omnidirectional Tracks: Design and Implementation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saha%2C+G">Gaurav Saha</a>, <a href="/search/cs?searchtype=author&query=Santosh%2C+K+M">K. M. Santosh</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Anugu Reddy</a>, <a href="/search/cs?searchtype=author&query=Kant%2C+R">Ravi Kant</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Arjun Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09973v1-abstract-short" style="display: inline;"> This paper presents the plan of an in-pipe climbing robot that works utilizing an astute transmission part to investigate complex relationship of lines. Standard wheeled/proceeded in-pipe climbing robots are inclined to slip and take while investigating in pipe turns. The instrument helps in accomplishing the main inevitable result of getting out slip and drag in the robot tracks during advancemen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09973v1-abstract-full').style.display = 'inline'; document.getElementById('2205.09973v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09973v1-abstract-full" style="display: none;"> This paper presents the plan of an in-pipe climbing robot that works utilizing an astute transmission part to investigate complex relationship of lines. Standard wheeled/proceeded in-pipe climbing robots are inclined to slip and take while investigating in pipe turns. The instrument helps in accomplishing the main inevitable result of getting out slip and drag in the robot tracks during advancement. The proposed transmission appreciates the practical furthest reaches of the standard two-yield transmission, which is developed the basic time for a transmission with three results. The instrument conclusively changes the track speeds of the robot considering the powers applied on each track inside the line relationship, by getting out the essential for any remarkable control. The amusement of the robot crossing in the line network in various orientation and in pipe-turns without slip shows the proposed game plan's adequacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09973v1-abstract-full').style.display = 'none'; document.getElementById('2205.09973v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures. arXiv admin note: text overlap with arXiv:2201.07865</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.04073">arXiv:2202.04073</a> <span> [<a href="https://arxiv.org/pdf/2202.04073">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The EMory BrEast imaging Dataset (EMBED): A Racially Diverse, Granular Dataset of 3.5M Screening and Diagnostic Mammograms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeong%2C+J+J">Jiwoong J. Jeong</a>, <a href="/search/cs?searchtype=author&query=Vey%2C+B+L">Brianna L. Vey</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Ananth Reddy</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+T">Thomas Kim</a>, <a href="/search/cs?searchtype=author&query=Santos%2C+T">Thiago Santos</a>, <a href="/search/cs?searchtype=author&query=Correa%2C+R">Ramon Correa</a>, <a href="/search/cs?searchtype=author&query=Dutt%2C+R">Raman Dutt</a>, <a href="/search/cs?searchtype=author&query=Mosunjac%2C+M">Marina Mosunjac</a>, <a href="/search/cs?searchtype=author&query=Oprea-Ilies%2C+G">Gabriela Oprea-Ilies</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+G">Geoffrey Smith</a>, <a href="/search/cs?searchtype=author&query=Woo%2C+M">Minjae Woo</a>, <a href="/search/cs?searchtype=author&query=McAdams%2C+C+R">Christopher R. McAdams</a>, <a href="/search/cs?searchtype=author&query=Newell%2C+M+S">Mary S. Newell</a>, <a href="/search/cs?searchtype=author&query=Banerjee%2C+I">Imon Banerjee</a>, <a href="/search/cs?searchtype=author&query=Gichoya%2C+J">Judy Gichoya</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+H">Hari Trivedi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.04073v1-abstract-short" style="display: inline;"> Developing and validating artificial intelligence models in medical imaging requires datasets that are large, granular, and diverse. To date, the majority of publicly available breast imaging datasets lack in one or more of these areas. Models trained on these data may therefore underperform on patient populations or pathologies that have not previously been encountered. The EMory BrEast imaging D… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.04073v1-abstract-full').style.display = 'inline'; document.getElementById('2202.04073v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.04073v1-abstract-full" style="display: none;"> Developing and validating artificial intelligence models in medical imaging requires datasets that are large, granular, and diverse. To date, the majority of publicly available breast imaging datasets lack in one or more of these areas. Models trained on these data may therefore underperform on patient populations or pathologies that have not previously been encountered. The EMory BrEast imaging Dataset (EMBED) addresses these gaps by providing 3650,000 2D and DBT screening and diagnostic mammograms for 116,000 women divided equally between White and African American patients. The dataset also contains 40,000 annotated lesions linked to structured imaging descriptors and 61 ground truth pathologic outcomes grouped into six severity classes. Our goal is to share this dataset with research partners to aid in development and validation of breast AI models that will serve all patients fairly and help decrease bias in medical AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.04073v1-abstract-full').style.display = 'none'; document.getElementById('2202.04073v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.07555">arXiv:2112.07555</a> <span> [<a href="https://arxiv.org/pdf/2112.07555">pdf</a>, <a href="https://arxiv.org/format/2112.07555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Tissues and Organs">q-bio.TO</span> </div> </div> <p class="title is-5 mathjax"> Classification of histopathology images using ConvNets to detect Lupus Nephritis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Akash Gupta</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Anirudh Reddy</a>, <a href="/search/cs?searchtype=author&query=Jawahar%2C+C">CV Jawahar</a>, <a href="/search/cs?searchtype=author&query=Vinod%2C+P">PK Vinod</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.07555v1-abstract-short" style="display: inline;"> Systemic lupus erythematosus (SLE) is an autoimmune disease in which the immune system of the patient starts attacking healthy tissues of the body. Lupus Nephritis (LN) refers to the inflammation of kidney tissues resulting in renal failure due to these attacks. The International Society of Nephrology/Renal Pathology Society (ISN/RPS) has released a classification system based on various patterns… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.07555v1-abstract-full').style.display = 'inline'; document.getElementById('2112.07555v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.07555v1-abstract-full" style="display: none;"> Systemic lupus erythematosus (SLE) is an autoimmune disease in which the immune system of the patient starts attacking healthy tissues of the body. Lupus Nephritis (LN) refers to the inflammation of kidney tissues resulting in renal failure due to these attacks. The International Society of Nephrology/Renal Pathology Society (ISN/RPS) has released a classification system based on various patterns observed during renal injury in SLE. Traditional methods require meticulous pathological assessment of the renal biopsy and are time-consuming. Recently, computational techniques have helped to alleviate this issue by using virtual microscopy or Whole Slide Imaging (WSI). With the use of deep learning and modern computer vision techniques, we propose a pipeline that is able to automate the process of 1) detection of various glomeruli patterns present in these whole slide images and 2) classification of each image using the extracted glomeruli features. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.07555v1-abstract-full').style.display = 'none'; document.getElementById('2112.07555v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in the 2021 Medical Imaging meets NeurIPS Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.05746">arXiv:2112.05746</a> <span> [<a href="https://arxiv.org/pdf/2112.05746">pdf</a>, <a href="https://arxiv.org/format/2112.05746">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On Causally Disentangled Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=L%2C+B+G">Benin Godfrey L</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.05746v1-abstract-short" style="display: inline;"> Representation learners that disentangle factors of variation have already proven to be important in addressing various real world concerns such as fairness and interpretability. Initially consisting of unsupervised models with independence assumptions, more recently, weak supervision and correlated features have been explored, but without a causal view of the generative process. In contrast, we w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05746v1-abstract-full').style.display = 'inline'; document.getElementById('2112.05746v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.05746v1-abstract-full" style="display: none;"> Representation learners that disentangle factors of variation have already proven to be important in addressing various real world concerns such as fairness and interpretability. Initially consisting of unsupervised models with independence assumptions, more recently, weak supervision and correlated features have been explored, but without a causal view of the generative process. In contrast, we work under the regime of a causal generative process where generative factors are either independent or can be potentially confounded by a set of observed or unobserved confounders. We present an analysis of disentangled representations through the notion of disentangled causal process. We motivate the need for new metrics and datasets to study causal disentanglement and propose two evaluation metrics and a dataset. We show that our metrics capture the desiderata of disentangled causal process. Finally, we perform an empirical study on state of the art disentangled representation learners using our metrics and dataset to evaluate them from causal perspective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05746v1-abstract-full').style.display = 'none'; document.getElementById('2112.05746v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://causal-disentanglement.github.io/IITH-CANDLE/ ; Accepted at the 36th AAAI Conference on Artificial Intelligence (AAAI 2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.14674">arXiv:2111.14674</a> <span> [<a href="https://arxiv.org/pdf/2111.14674">pdf</a>, <a href="https://arxiv.org/ps/2111.14674">ps</a>, <a href="https://arxiv.org/format/2111.14674">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Online MAP Inference and Learning for Nonsymmetric Determinantal Point Processes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Aravind Reddy</a>, <a href="/search/cs?searchtype=author&query=Rossi%2C+R+A">Ryan A. Rossi</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhao Song</a>, <a href="/search/cs?searchtype=author&query=Rao%2C+A">Anup Rao</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+T">Tung Mai</a>, <a href="/search/cs?searchtype=author&query=Lipka%2C+N">Nedim Lipka</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+G">Gang Wu</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+E">Eunyee Koh</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+N">Nesreen Ahmed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.14674v1-abstract-short" style="display: inline;"> In this paper, we introduce the online and streaming MAP inference and learning problems for Non-symmetric Determinantal Point Processes (NDPPs) where data points arrive in an arbitrary order and the algorithms are constrained to use a single-pass over the data as well as sub-linear memory. The online setting has an additional requirement of maintaining a valid solution at any point in time. For s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.14674v1-abstract-full').style.display = 'inline'; document.getElementById('2111.14674v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.14674v1-abstract-full" style="display: none;"> In this paper, we introduce the online and streaming MAP inference and learning problems for Non-symmetric Determinantal Point Processes (NDPPs) where data points arrive in an arbitrary order and the algorithms are constrained to use a single-pass over the data as well as sub-linear memory. The online setting has an additional requirement of maintaining a valid solution at any point in time. For solving these new problems, we propose algorithms with theoretical guarantees, evaluate them on several real-world datasets, and show that they give comparable performance to state-of-the-art offline algorithms that store the entire data in memory and take multiple passes over it. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.14674v1-abstract-full').style.display = 'none'; document.getElementById('2111.14674v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.12490">arXiv:2111.12490</a> <span> [<a href="https://arxiv.org/pdf/2111.12490">pdf</a>, <a href="https://arxiv.org/format/2111.12490">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Matching Learned Causal Effects of Neural Networks with Domain Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kancheti%2C+S+S">Sai Srinivas Kancheti</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.12490v4-abstract-short" style="display: inline;"> A trained neural network can be interpreted as a structural causal model (SCM) that provides the effect of changing input variables on the model's output. However, if training data contains both causal and correlational relationships, a model that optimizes prediction accuracy may not necessarily learn the true causal relationships between input and output variables. On the other hand, expert user… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.12490v4-abstract-full').style.display = 'inline'; document.getElementById('2111.12490v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.12490v4-abstract-full" style="display: none;"> A trained neural network can be interpreted as a structural causal model (SCM) that provides the effect of changing input variables on the model's output. However, if training data contains both causal and correlational relationships, a model that optimizes prediction accuracy may not necessarily learn the true causal relationships between input and output variables. On the other hand, expert users often have prior knowledge of the causal relationship between certain input variables and output from domain knowledge. Therefore, we propose a regularization method that aligns the learned causal effects of a neural network with domain priors, including both direct and total causal effects. We show that this approach can generalize to different kinds of domain priors, including monotonicity of causal effect of an input variable on output or zero causal effect of a variable on output for purposes of fairness. Our experiments on twelve benchmark datasets show its utility in regularizing a neural network model to maintain desired causal effects, without compromising on accuracy. Importantly, we also show that a model thus trained is robust and gets improved accuracy on noisy inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.12490v4-abstract-full').style.display = 'none'; document.getElementById('2111.12490v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at International Conference on Machine Learning (ICML'22)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.04331">arXiv:2110.04331</a> <span> [<a href="https://arxiv.org/pdf/2110.04331">pdf</a>, <a href="https://arxiv.org/ps/2110.04331">ps</a>, <a href="https://arxiv.org/format/2110.04331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> MusicNet: Compact Convolutional Neural Network for Real-time Background Music Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+C+K+A">Chandan K. A. Reddy</a>, <a href="/search/cs?searchtype=author&query=Gopa%2C+V">Vishak Gopa</a>, <a href="/search/cs?searchtype=author&query=Dubey%2C+H">Harishchandra Dubey</a>, <a href="/search/cs?searchtype=author&query=Matusevych%2C+S">Sergiy Matusevych</a>, <a href="/search/cs?searchtype=author&query=Cutler%2C+R">Ross Cutler</a>, <a href="/search/cs?searchtype=author&query=Aichner%2C+R">Robert Aichner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.04331v2-abstract-short" style="display: inline;"> With the recent growth of remote work, online meetings often encounter challenging audio contexts such as background noise, music, and echo. Accurate real-time detection of music events can help to improve the user experience. In this paper, we present MusicNet, a compact neural model for detecting background music in the real-time communications pipeline. In video meetings, music frequently co-oc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04331v2-abstract-full').style.display = 'inline'; document.getElementById('2110.04331v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.04331v2-abstract-full" style="display: none;"> With the recent growth of remote work, online meetings often encounter challenging audio contexts such as background noise, music, and echo. Accurate real-time detection of music events can help to improve the user experience. In this paper, we present MusicNet, a compact neural model for detecting background music in the real-time communications pipeline. In video meetings, music frequently co-occurs with speech and background noises, making the accurate classification quite challenging. We propose a compact convolutional neural network core preceded by an in-model featurization layer. MusicNet takes 9 seconds of raw audio as input and does not require any model-specific featurization in the product stack. We train our model on the balanced subset of the Audio Set~\cite{gemmeke2017audio} data and validate it on 1000 crowd-sourced real test clips. Finally, we compare MusicNet performance with 20 state-of-the-art models. MusicNet has a true positive rate (TPR) of 81.3% at a 0.1% false positive rate (FPR), which is significantly better than state-of-the-art models included in our study. MusicNet is also 10x smaller and has 4x faster inference than the best performing models we benchmarked. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04331v2-abstract-full').style.display = 'none'; document.getElementById('2110.04331v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.01763">arXiv:2110.01763</a> <span> [<a href="https://arxiv.org/pdf/2110.01763">pdf</a>, <a href="https://arxiv.org/format/2110.01763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> DNSMOS P.835: A Non-Intrusive Perceptual Objective Speech Quality Metric to Evaluate Noise Suppressors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+C+K+A">Chandan K A Reddy</a>, <a href="/search/cs?searchtype=author&query=Gopal%2C+V">Vishak Gopal</a>, <a href="/search/cs?searchtype=author&query=Cutler%2C+R">Ross Cutler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.01763v4-abstract-short" style="display: inline;"> Human subjective evaluation is the gold standard to evaluate speech quality optimized for human perception. Perceptual objective metrics serve as a proxy for subjective scores. We have recently developed a non-intrusive speech quality metric called Deep Noise Suppression Mean Opinion Score (DNSMOS) using the scores from ITU-T Rec. P.808 subjective evaluation. The P.808 scores reflect the overall q… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.01763v4-abstract-full').style.display = 'inline'; document.getElementById('2110.01763v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.01763v4-abstract-full" style="display: none;"> Human subjective evaluation is the gold standard to evaluate speech quality optimized for human perception. Perceptual objective metrics serve as a proxy for subjective scores. We have recently developed a non-intrusive speech quality metric called Deep Noise Suppression Mean Opinion Score (DNSMOS) using the scores from ITU-T Rec. P.808 subjective evaluation. The P.808 scores reflect the overall quality of the audio clip. ITU-T Rec. P.835 subjective evaluation framework gives the standalone quality scores of speech and background noise in addition to the overall quality. In this work, we train an objective metric based on P.835 human ratings that outputs 3 scores: i) speech quality (SIG), ii) background noise quality (BAK), and iii) the overall quality (OVRL) of the audio. The developed metric is highly correlated with human ratings, with a Pearson's Correlation Coefficient (PCC)=0.94 for SIG and PCC=0.98 for BAK and OVRL. This is the first non-intrusive P.835 predictor we are aware of. DNSMOS P.835 is made publicly available as an Azure service. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.01763v4-abstract-full').style.display = 'none'; document.getElementById('2110.01763v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2010.15258</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.07184">arXiv:2107.07184</a> <span> [<a href="https://arxiv.org/pdf/2107.07184">pdf</a>, <a href="https://arxiv.org/format/2107.07184">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MURAL: Meta-Learning Uncertainty-Aware Rewards for Outcome-Driven Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Kevin Li</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Abhishek Gupta</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Ashwin Reddy</a>, <a href="/search/cs?searchtype=author&query=Pong%2C+V">Vitchyr Pong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aurick Zhou</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Justin Yu</a>, <a href="/search/cs?searchtype=author&query=Levine%2C+S">Sergey Levine</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.07184v2-abstract-short" style="display: inline;"> Exploration in reinforcement learning is a challenging problem: in the worst case, the agent must search for high-reward states that could be hidden anywhere in the state space. Can we define a more tractable class of RL problems, where the agent is provided with examples of successful outcomes? In this problem setting, the reward function can be obtained automatically by training a classifier to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.07184v2-abstract-full').style.display = 'inline'; document.getElementById('2107.07184v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.07184v2-abstract-full" style="display: none;"> Exploration in reinforcement learning is a challenging problem: in the worst case, the agent must search for high-reward states that could be hidden anywhere in the state space. Can we define a more tractable class of RL problems, where the agent is provided with examples of successful outcomes? In this problem setting, the reward function can be obtained automatically by training a classifier to categorize states as successful or not. If trained properly, such a classifier can provide a well-shaped objective landscape that both promotes progress toward good states and provides a calibrated exploration bonus. In this work, we show that an uncertainty aware classifier can solve challenging reinforcement learning problems by both encouraging exploration and provided directed guidance towards positive outcomes. We propose a novel mechanism for obtaining these calibrated, uncertainty-aware classifiers based on an amortized technique for computing the normalized maximum likelihood (NML) distribution. To make this tractable, we propose a novel method for computing the NML distribution by using meta-learning. We show that the resulting algorithm has a number of intriguing connections to both count-based exploration methods and prior algorithms for learning reward functions, while also providing more effective guidance towards the goal. We demonstrate that our algorithm solves a number of challenging navigation and robotic manipulation tasks which prove difficult or impossible for prior methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.07184v2-abstract-full').style.display = 'none'; document.getElementById('2107.07184v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICML 2021. First two authors contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.06173">arXiv:2107.06173</a> <span> [<a href="https://arxiv.org/pdf/2107.06173">pdf</a>, <a href="https://arxiv.org/format/2107.06173">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSP.2020.2971936">10.1109/TSP.2020.2971936 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Orthogonal and Non-Orthogonal Signal Representations Using New Transformation Matrices Having NPM Structure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shah%2C+S+B">Shaik Basheeruddin Shah</a>, <a href="/search/cs?searchtype=author&query=Chakka%2C+V+K">Vijay Kumar Chakka</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+S">Arikatla Satyanarayana Reddy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.06173v1-abstract-short" style="display: inline;"> In this paper, we introduce two types of real-valued sums known as Complex Conjugate Pair Sums (CCPSs) denoted as CCPS$^{(1)}$ and CCPS$^{(2)}$, and discuss a few of their properties. Using each type of CCPSs and their circular shifts, we construct two non-orthogonal Nested Periodic Matrices (NPMs). As NPMs are non-singular, this introduces two non-orthogonal transforms known as Complex Conjugate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06173v1-abstract-full').style.display = 'inline'; document.getElementById('2107.06173v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.06173v1-abstract-full" style="display: none;"> In this paper, we introduce two types of real-valued sums known as Complex Conjugate Pair Sums (CCPSs) denoted as CCPS$^{(1)}$ and CCPS$^{(2)}$, and discuss a few of their properties. Using each type of CCPSs and their circular shifts, we construct two non-orthogonal Nested Periodic Matrices (NPMs). As NPMs are non-singular, this introduces two non-orthogonal transforms known as Complex Conjugate Periodic Transforms (CCPTs) denoted as CCPT$^{(1)}$ and CCPT$^{(2)}$. We propose another NPM, which uses both types of CCPSs such that its columns are mutually orthogonal, this transform is known as Orthogonal CCPT (OCCPT). After a brief study of a few OCCPT properties like periodicity, circular shift, etc., we present two different interpretations of it. Further, we propose a Decimation-In-Time (DIT) based fast computation algorithm for OCCPT (termed as FOCCPT), whenever the length of the signal is equal to $2^v,\ v{\in} \mathbb{N}$. The proposed sums and transforms are inspired by Ramanujan sums and Ramanujan Period Transform (RPT). Finally, we show that the period (both divisor and non-divisor) and frequency information of a signal can be estimated using the proposed transforms with a significant reduction in the computational complexity over Discrete Fourier Transform (DFT). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06173v1-abstract-full').style.display = 'none'; document.getElementById('2107.06173v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 figures,</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Signal Processing, 06 February 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.02437">arXiv:2107.02437</a> <span> [<a href="https://arxiv.org/pdf/2107.02437">pdf</a>, <a href="https://arxiv.org/ps/2107.02437">ps</a>, <a href="https://arxiv.org/format/2107.02437">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Turbo Coded Single User Massive MIMO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vasudevan%2C+K">K. Vasudevan</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+P+K">A. Phani Kumar Reddy</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+G+K">Gyanesh Kumar Pathak</a>, <a href="/search/cs?searchtype=author&query=Albreem%2C+M">Mahmoud Albreem</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.02437v1-abstract-short" style="display: inline;"> This work deals with turbo coded single user massive multiple input multiple output (SU-MMIMO) systems, with and without precoding. SU-MMIMO has a much higher spectral efficiency compared to multi-user massive MIMO (MU-MMIMO) since independent signals are transmitted from each of the antenna elements (spatial multiplexing). MU-MMIMO that uses beamforming has a much lower spectral efficiency, since… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.02437v1-abstract-full').style.display = 'inline'; document.getElementById('2107.02437v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.02437v1-abstract-full" style="display: none;"> This work deals with turbo coded single user massive multiple input multiple output (SU-MMIMO) systems, with and without precoding. SU-MMIMO has a much higher spectral efficiency compared to multi-user massive MIMO (MU-MMIMO) since independent signals are transmitted from each of the antenna elements (spatial multiplexing). MU-MMIMO that uses beamforming has a much lower spectral efficiency, since the same signal (with a delay) is transmitted from each of the antenna elements. In this work, expressions for the upper bound on the average signal-to-noise ratio (SNR) per bit and spectral efficiency are derived for SU-MMIMO with and without precoding. We propose a performance index $f(N_t)$, which is a function of the number of transmit antennas $N_t$. Here $f(N_t)$ is the sum of the upper bound on the average SNR per bit and the spectral efficiency. We demonstrate that when the total number of antennas ($N_{\mathrm{tot}}$) in the transmitter and receiver is fixed, there exists a minimum value of $f(N_t)$, which has to be avoided. Computer simulations show that the bit-error-rate (BER) is nearly insensitive to a wide range of the number of transmit antennas and re-transmissions, when $N_{\mathrm{tot}}$ is large and kept constant. Thus, the spectral efficiency can be made as large as possible, for a given BER and $N_{\mathrm{tot}}$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.02437v1-abstract-full').style.display = 'none'; document.getElementById('2107.02437v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 8 figures, journal. arXiv admin note: substantial text overlap with arXiv:2007.15959</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.15917">arXiv:2106.15917</a> <span> [<a href="https://arxiv.org/pdf/2106.15917">pdf</a>, <a href="https://arxiv.org/format/2106.15917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="General Economics">econ.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Explaining Caste-based Digital Divide in India </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vaidehi%2C+R">R Vaidehi</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+B">A Bheemeshwar Reddy</a>, <a href="/search/cs?searchtype=author&query=Banerjee%2C+S">Sudatta Banerjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.15917v1-abstract-short" style="display: inline;"> With the increasing importance of information and communication technologies in access to basic services like education and health, the question of the digital divide based on caste assumes importance in India where large socioeconomic disparities persist between different caste groups. Studies on caste-based digital inequality are still scanty in India. Using nationally representative survey data… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.15917v1-abstract-full').style.display = 'inline'; document.getElementById('2106.15917v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.15917v1-abstract-full" style="display: none;"> With the increasing importance of information and communication technologies in access to basic services like education and health, the question of the digital divide based on caste assumes importance in India where large socioeconomic disparities persist between different caste groups. Studies on caste-based digital inequality are still scanty in India. Using nationally representative survey data, this paper analyzes the first-level digital divide (ownership of computer and access to the internet) and the second-level digital divide (individual's skill to use computer and the internet) between the disadvantaged caste group and the others. Further, this paper identifies the caste group-based differences in socioeconomic factors that contribute to the digital divide between these groups using a non-linear decomposition method. The results show that there exists a large first-level and second-level digital divide between the disadvantaged caste groups and others in India. The non-linear decomposition results indicate that the caste-based digital divide in India is rooted in historical socioeconomic deprivation of disadvantaged caste groups. More than half of the caste-based digital gap is attributable to differences in educational attainment and income between the disadvantaged caste groups and others. The findings of this study highlight the urgent need for addressing educational and income inequality between the different caste groups in India in order to bridge the digital divide. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.15917v1-abstract-full').style.display = 'none'; document.getElementById('2106.15917v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.05842">arXiv:2106.05842</a> <span> [<a href="https://arxiv.org/pdf/2106.05842">pdf</a>, <a href="https://arxiv.org/format/2106.05842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3461702.3462467">10.1145/3461702.3462467 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Causality in Neural Networks -- An Extended Abstract </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Reddy%2C+A+G">Abbavaram Gowtham Reddy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.05842v1-abstract-short" style="display: inline;"> Causal reasoning is the main learning and explanation tool used by humans. AI systems should possess causal reasoning capabilities to be deployed in the real world with trust and reliability. Introducing the ideas of causality to machine learning helps in providing better learning and explainable models. Explainability, causal disentanglement are some important aspects of any machine learning mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.05842v1-abstract-full').style.display = 'inline'; document.getElementById('2106.05842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.05842v1-abstract-full" style="display: none;"> Causal reasoning is the main learning and explanation tool used by humans. AI systems should possess causal reasoning capabilities to be deployed in the real world with trust and reliability. Introducing the ideas of causality to machine learning helps in providing better learning and explainable models. Explainability, causal disentanglement are some important aspects of any machine learning model. Causal explanations are required to believe in a model's decision and causal disentanglement learning is important for transfer learning applications. We exploit the ideas of causality to be used in deep learning models to achieve better and causally explainable models that are useful in fairness, disentangled representation, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.05842v1-abstract-full').style.display = 'none'; document.getElementById('2106.05842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.07855">arXiv:2105.07855</a> <span> [<a href="https://arxiv.org/pdf/2105.07855">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.14445/22315381/IJETT-V69I5P217">10.14445/22315381/IJETT-V69I5P217 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Extensive Analytical Approach on Human Resources using Random Forest Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=papineni%2C+S+l+v">Swarajya lakshmi v papineni</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A+M">A. Mallikarjuna Reddy</a>, <a href="/search/cs?searchtype=author&query=yarlagadda%2C+S">Sudeepti yarlagadda</a>, <a href="/search/cs?searchtype=author&query=Yarlagadda%2C+S">Snigdha Yarlagadda</a>, <a href="/search/cs?searchtype=author&query=Akkinen%2C+H">Haritha Akkinen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.07855v1-abstract-short" style="display: inline;"> The current job survey shows that most software employees are planning to change their job role due to high pay for recent jobs such as data scientists, business analysts and artificial intelligence fields. The survey also indicated that work life imbalances, low pay, uneven shifts and many other factors also make employees think about changing their work life. In this paper, for an efficient orga… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07855v1-abstract-full').style.display = 'inline'; document.getElementById('2105.07855v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.07855v1-abstract-full" style="display: none;"> The current job survey shows that most software employees are planning to change their job role due to high pay for recent jobs such as data scientists, business analysts and artificial intelligence fields. The survey also indicated that work life imbalances, low pay, uneven shifts and many other factors also make employees think about changing their work life. In this paper, for an efficient organisation of the company in terms of human resources, the proposed system designed a model with the help of a random forest algorithm by considering different employee parameters. This helps the HR department retain the employee by identifying gaps and helping the organisation to run smoothly with a good employee retention ratio. This combination of HR and data science can help the productivity, collaboration and well-being of employees of the organisation. It also helps to develop strategies that have an impact on the performance of employees in terms of external and social factors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07855v1-abstract-full').style.display = 'none'; document.getElementById('2105.07855v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.00034">arXiv:2103.00034</a> <span> [<a href="https://arxiv.org/pdf/2103.00034">pdf</a>, <a href="https://arxiv.org/format/2103.00034">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Beyond Perturbation Stability: LP Recovery Guarantees for MAP Inference on Noisy Stable Instances </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lang%2C+H">Hunter Lang</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+A">Aravind Reddy</a>, <a href="/search/cs?searchtype=author&query=Sontag%2C+D">David Sontag</a>, <a href="/search/cs?searchtype=author&query=Vijayaraghavan%2C+A">Aravindan Vijayaraghavan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.00034v1-abstract-short" style="display: inline;"> Several works have shown that perturbation stable instances of the MAP inference problem in Potts models can be solved exactly using a natural linear programming (LP) relaxation. However, most of these works give few (or no) guarantees for the LP solutions on instances that do not satisfy the relatively strict perturbation stability definitions. In this work, we go beyond these stability results b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00034v1-abstract-full').style.display = 'inline'; document.getElementById('2103.00034v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.00034v1-abstract-full" style="display: none;"> Several works have shown that perturbation stable instances of the MAP inference problem in Potts models can be solved exactly using a natural linear programming (LP) relaxation. However, most of these works give few (or no) guarantees for the LP solutions on instances that do not satisfy the relatively strict perturbation stability definitions. In this work, we go beyond these stability results by showing that the LP approximately recovers the MAP solution of a stable instance even after the instance is corrupted by noise. This "noisy stable" model realistically fits with practical MAP inference problems: we design an algorithm for finding "close" stable instances, and show that several real-world instances from computer vision have nearby instances that are perturbation stable. These results suggest a new theoretical explanation for the excellent performance of this LP relaxation in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00034v1-abstract-full').style.display = 'none'; document.getElementById('2103.00034v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 2 figures, 2 tables. To appear in AISTATS 2021</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Reddy%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Reddy%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Reddy%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository