Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 319 results for author: <span class="mathjax">Gupta, P</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Gupta%2C+P">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Gupta, P"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Gupta%2C+P&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Gupta, P"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.20311">arXiv:2502.20311</a> <span> [<a href="https://arxiv.org/pdf/2502.20311">pdf</a>, <a href="https://arxiv.org/format/2502.20311">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Adapting Automatic Speech Recognition for Accented Air Traffic Control Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wee%2C+M+Y+Z">Marcus Yu Zhe Wee</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+J+J+H">Justin Juin Hng Wong</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+L">Lynus Lim</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+J+Y+W">Joe Yu Wei Tan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prannaya Gupta</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+D">Dillion Lim</a>, <a href="/search/cs?searchtype=author&query=Tew%2C+E+H">En Hao Tew</a>, <a href="/search/cs?searchtype=author&query=Han%2C+A+K+S">Aloysius Keng Siew Han</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+Y+Z">Yong Zhi Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.20311v1-abstract-short" style="display: inline;"> Effective communication in Air Traffic Control (ATC) is critical to maintaining aviation safety, yet the challenges posed by accented English remain largely unaddressed in Automatic Speech Recognition (ASR) systems. Existing models struggle with transcription accuracy for Southeast Asian-accented (SEA-accented) speech, particularly in noisy ATC environments. This study presents the development of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20311v1-abstract-full').style.display = 'inline'; document.getElementById('2502.20311v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.20311v1-abstract-full" style="display: none;"> Effective communication in Air Traffic Control (ATC) is critical to maintaining aviation safety, yet the challenges posed by accented English remain largely unaddressed in Automatic Speech Recognition (ASR) systems. Existing models struggle with transcription accuracy for Southeast Asian-accented (SEA-accented) speech, particularly in noisy ATC environments. This study presents the development of ASR models fine-tuned specifically for Southeast Asian accents using a newly created dataset. Our research achieves significant improvements, achieving a Word Error Rate (WER) of 0.0982 or 9.82% on SEA-accented ATC speech. Additionally, the paper highlights the importance of region-specific datasets and accent-focused training, offering a pathway for deploying ASR systems in resource-constrained military operations. The findings emphasize the need for noise-robust training techniques and region-specific datasets to improve transcription accuracy for non-Western accents in ATC communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.20311v1-abstract-full').style.display = 'none'; document.getElementById('2502.20311v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06493">arXiv:2502.06493</a> <span> [<a href="https://arxiv.org/pdf/2502.06493">pdf</a>, <a href="https://arxiv.org/format/2502.06493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> EdgeMLBalancer: A Self-Adaptive Approach for Dynamic Model Switching on Resource-Constrained Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Matathammal%2C+A">Akhila Matathammal</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+K">Kriti Gupta</a>, <a href="/search/cs?searchtype=author&query=Lavanya%2C+L">Larissa Lavanya</a>, <a href="/search/cs?searchtype=author&query=Halgatti%2C+A+V">Ananya Vishal Halgatti</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyanshi Gupta</a>, <a href="/search/cs?searchtype=author&query=Vaidhyanathan%2C+K">Karthik Vaidhyanathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06493v1-abstract-short" style="display: inline;"> The widespread adoption of machine learning on edge devices, such as mobile phones, laptops, IoT devices, etc., has enabled real-time AI applications in resource-constrained environments. Existing solutions for managing computational resources often focus narrowly on accuracy or energy efficiency, failing to adapt dynamically to varying workloads. Furthermore, the existing system lack robust mecha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06493v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06493v1-abstract-full" style="display: none;"> The widespread adoption of machine learning on edge devices, such as mobile phones, laptops, IoT devices, etc., has enabled real-time AI applications in resource-constrained environments. Existing solutions for managing computational resources often focus narrowly on accuracy or energy efficiency, failing to adapt dynamically to varying workloads. Furthermore, the existing system lack robust mechanisms to adaptively balance CPU utilization, leading to inefficiencies in resource-constrained scenarios like real-time traffic monitoring. To address these limitations, we propose a self-adaptive approach that optimizes CPU utilization and resource management on edge devices. Our approach, EdgeMLBalancer balances between models through dynamic switching, guided by real-time CPU usage monitoring across processor cores. Tested on real-time traffic data, the approach adapts object detection models based on CPU usage, ensuring efficient resource utilization. The approach leverages epsilon-greedy strategy which promotes fairness and prevents resource starvation, maintaining system robustness. The results of our evaluation demonstrate significant improvements by balancing computational efficiency and accuracy, highlighting the approach's ability to adapt seamlessly to varying workloads. This work lays the groundwork for further advancements in self-adaptation for resource-constrained environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06493v1-abstract-full').style.display = 'none'; document.getElementById('2502.06493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05352">arXiv:2502.05352</a> <span> [<a href="https://arxiv.org/pdf/2502.05352">pdf</a>, <a href="https://arxiv.org/format/2502.05352">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> ITBench: Evaluating AI Agents across Diverse Real-World IT Automation Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jha%2C+S">Saurabh Jha</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+R">Rohan Arora</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+Y">Yuji Watanabe</a>, <a href="/search/cs?searchtype=author&query=Yanagawa%2C+T">Takumi Yanagawa</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yinfang Chen</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J">Jackson Clark</a>, <a href="/search/cs?searchtype=author&query=Bhavya%2C+B">Bhavya Bhavya</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+M">Mudit Verma</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+H">Harshit Kumar</a>, <a href="/search/cs?searchtype=author&query=Kitahara%2C+H">Hirokuni Kitahara</a>, <a href="/search/cs?searchtype=author&query=Zheutlin%2C+N">Noah Zheutlin</a>, <a href="/search/cs?searchtype=author&query=Takano%2C+S">Saki Takano</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+D">Divya Pathak</a>, <a href="/search/cs?searchtype=author&query=George%2C+F">Felix George</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xinbo Wu</a>, <a href="/search/cs?searchtype=author&query=Turkkan%2C+B+O">Bekir O. Turkkan</a>, <a href="/search/cs?searchtype=author&query=Vanloo%2C+G">Gerard Vanloo</a>, <a href="/search/cs?searchtype=author&query=Nidd%2C+M">Michael Nidd</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+T">Ting Dai</a>, <a href="/search/cs?searchtype=author&query=Chatterjee%2C+O">Oishik Chatterjee</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranjal Gupta</a>, <a href="/search/cs?searchtype=author&query=Samanta%2C+S">Suranjana Samanta</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+P">Pooja Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+R">Rong Lee</a>, <a href="/search/cs?searchtype=author&query=Murali%2C+P">Pavankumar Murali</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05352v1-abstract-short" style="display: inline;"> Realizing the vision of using AI agents to automate critical IT tasks depends on the ability to measure and understand effectiveness of proposed solutions. We introduce ITBench, a framework that offers a systematic methodology for benchmarking AI agents to address real-world IT automation tasks. Our initial release targets three key areas: Site Reliability Engineering (SRE), Compliance and Securit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05352v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05352v1-abstract-full" style="display: none;"> Realizing the vision of using AI agents to automate critical IT tasks depends on the ability to measure and understand effectiveness of proposed solutions. We introduce ITBench, a framework that offers a systematic methodology for benchmarking AI agents to address real-world IT automation tasks. Our initial release targets three key areas: Site Reliability Engineering (SRE), Compliance and Security Operations (CISO), and Financial Operations (FinOps). The design enables AI researchers to understand the challenges and opportunities of AI agents for IT automation with push-button workflows and interpretable metrics. ITBench includes an initial set of 94 real-world scenarios, which can be easily extended by community contributions. Our results show that agents powered by state-of-the-art models resolve only 13.8% of SRE scenarios, 25.2% of CISO scenarios, and 0% of FinOps scenarios. We expect ITBench to be a key enabler of AI-driven IT automation that is correct, safe, and fast. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05352v1-abstract-full').style.display = 'none'; document.getElementById('2502.05352v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18919">arXiv:2501.18919</a> <span> [<a href="https://arxiv.org/pdf/2501.18919">pdf</a>, <a href="https://arxiv.org/format/2501.18919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Deepfake Detection of Singing Voices With Whisper Encodings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+F">Falguni Sharma</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyanka Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18919v1-abstract-short" style="display: inline;"> The deepfake generation of singing vocals is a concerning issue for artists in the music industry. In this work, we propose a singing voice deepfake detection (SVDD) system, which uses noise-variant encodings of open-AI's Whisper model. As counter-intuitive as it may sound, even though the Whisper model is known to be noise-robust, the encodings are rich in non-speech information, and are noise-va… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18919v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18919v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18919v1-abstract-full" style="display: none;"> The deepfake generation of singing vocals is a concerning issue for artists in the music industry. In this work, we propose a singing voice deepfake detection (SVDD) system, which uses noise-variant encodings of open-AI's Whisper model. As counter-intuitive as it may sound, even though the Whisper model is known to be noise-robust, the encodings are rich in non-speech information, and are noise-variant. This leads us to evaluate Whisper encodings as feature representations for the SVDD task. Therefore, in this work, the SVDD task is performed on vocals and mixtures, and the performance is evaluated in \%EER over varying Whisper model sizes and two classifiers- CNN and ResNet34, under different testing conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18919v1-abstract-full').style.display = 'none'; document.getElementById('2501.18919v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICASSP,2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16997">arXiv:2501.16997</a> <span> [<a href="https://arxiv.org/pdf/2501.16997">pdf</a>, <a href="https://arxiv.org/format/2501.16997">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MAUCell: An Adaptive Multi-Attention Framework for Video Frame Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+S">Shreyam Gupta</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+P">P. Agrawal</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyam Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16997v1-abstract-short" style="display: inline;"> Temporal sequence modeling stands as the fundamental foundation for video prediction systems and real-time forecasting operations as well as anomaly detection applications. The achievement of accurate predictions through efficient resource consumption remains an ongoing issue in contemporary temporal sequence modeling. We introduce the Multi-Attention Unit (MAUCell) which combines Generative Adver… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16997v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16997v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16997v1-abstract-full" style="display: none;"> Temporal sequence modeling stands as the fundamental foundation for video prediction systems and real-time forecasting operations as well as anomaly detection applications. The achievement of accurate predictions through efficient resource consumption remains an ongoing issue in contemporary temporal sequence modeling. We introduce the Multi-Attention Unit (MAUCell) which combines Generative Adversarial Networks (GANs) and spatio-temporal attention mechanisms to improve video frame prediction capabilities. Our approach implements three types of attention models to capture intricate motion sequences. A dynamic combination of these attention outputs allows the model to reach both advanced decision accuracy along with superior quality while remaining computationally efficient. The integration of GAN elements makes generated frames appear more true to life therefore the framework creates output sequences which mimic real-world footage. The new design system maintains equilibrium between temporal continuity and spatial accuracy to deliver reliable video prediction. Through a comprehensive evaluation methodology which merged the perceptual LPIPS measurement together with classic tests MSE, MAE, SSIM and PSNR exhibited enhancing capabilities than contemporary approaches based on direct benchmark tests of Moving MNIST, KTH Action, and CASIA-B (Preprocessed) datasets. Our examination indicates that MAUCell shows promise for operational time requirements. The research findings demonstrate how GANs work best with attention mechanisms to create better applications for predicting video sequences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16997v1-abstract-full').style.display = 'none'; document.getElementById('2501.16997v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IJCAI 2025 Conference for review. It contains: 11 pages, 4 figures, 7 tables, and 3 Algorithms</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16539">arXiv:2501.16539</a> <span> [<a href="https://arxiv.org/pdf/2501.16539">pdf</a>, <a href="https://arxiv.org/format/2501.16539">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Generalized Mission Planning for Heterogeneous Multi-Robot Teams via LLM-constructed Hierarchical Trees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Piyush Gupta</a>, <a href="/search/cs?searchtype=author&query=Isele%2C+D">David Isele</a>, <a href="/search/cs?searchtype=author&query=Sachdeva%2C+E">Enna Sachdeva</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Pin-Hao Huang</a>, <a href="/search/cs?searchtype=author&query=Dariush%2C+B">Behzad Dariush</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kwonjoon Lee</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+S">Sangjae Bae</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16539v1-abstract-short" style="display: inline;"> We present a novel mission-planning strategy for heterogeneous multi-robot teams, taking into account the specific constraints and capabilities of each robot. Our approach employs hierarchical trees to systematically break down complex missions into manageable sub-tasks. We develop specialized APIs and tools, which are utilized by Large Language Models (LLMs) to efficiently construct these hierarc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16539v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16539v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16539v1-abstract-full" style="display: none;"> We present a novel mission-planning strategy for heterogeneous multi-robot teams, taking into account the specific constraints and capabilities of each robot. Our approach employs hierarchical trees to systematically break down complex missions into manageable sub-tasks. We develop specialized APIs and tools, which are utilized by Large Language Models (LLMs) to efficiently construct these hierarchical trees. Once the hierarchical tree is generated, it is further decomposed to create optimized schedules for each robot, ensuring adherence to their individual constraints and capabilities. We demonstrate the effectiveness of our framework through detailed examples covering a wide range of missions, showcasing its flexibility and scalability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16539v1-abstract-full').style.display = 'none'; document.getElementById('2501.16539v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08990">arXiv:2501.08990</a> <span> [<a href="https://arxiv.org/pdf/2501.08990">pdf</a>, <a href="https://arxiv.org/format/2501.08990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> 3GPP Network Architecture Enhancement for Ambient IoT Service </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongjoo Kim</a>, <a href="/search/cs?searchtype=author&query=Godin%2C+P">Philippe Godin</a>, <a href="/search/cs?searchtype=author&query=Bjerrum%2C+B">Bo Bjerrum</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pallab Gupta</a>, <a href="/search/cs?searchtype=author&query=Butt%2C+M+M">M. Majid Butt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08990v1-abstract-short" style="display: inline;"> Ambient internet of things (A-IoT) paradigm is under study in 3GPP with the intention to provide a sustainable solution for the IoT market without any need to replace the batteries and operate in harsh environments where it is difficult to replenish batteries. This article provides insight on 3rd Generation Partnership Project (3GPP) discussions in Release 18 and 19 with the focus on network archi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08990v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08990v1-abstract-full" style="display: none;"> Ambient internet of things (A-IoT) paradigm is under study in 3GPP with the intention to provide a sustainable solution for the IoT market without any need to replace the batteries and operate in harsh environments where it is difficult to replenish batteries. This article provides insight on 3rd Generation Partnership Project (3GPP) discussions in Release 18 and 19 with the focus on network architecture aspects. 3GPP has recently decided to start normative work in its Radio Access Network (RAN) Working Group (WG) and discussions are ongoing to start a work item in other WGs with more focus on architecture aspects. We explore and analyze various aspects of system design related to architecture requirements to support A-IoT service, different architecture options to consider, security and authentication mechanisms for A-IoT devices as well as key challenges for standardization of A-IoT service. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08990v1-abstract-full').style.display = 'none'; document.getElementById('2501.08990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00029">arXiv:2501.00029</a> <span> [<a href="https://arxiv.org/pdf/2501.00029">pdf</a>, <a href="https://arxiv.org/format/2501.00029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Breadth-First Catalog of Text Processing, Speech Processing and Multimodal Research in South Asian Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranav Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00029v1-abstract-short" style="display: inline;"> We review the recent literature (January 2022- October 2024) in South Asian languages on text-based language processing, multimodal models, and speech processing, and provide a spotlight analysis focused on 21 low-resource South Asian languages, namely Saraiki, Assamese, Balochi, Bhojpuri, Bodo, Burmese, Chhattisgarhi, Dhivehi, Gujarati, Kannada, Kashmiri, Konkani, Khasi, Malayalam, Meitei, Nepali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00029v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00029v1-abstract-full" style="display: none;"> We review the recent literature (January 2022- October 2024) in South Asian languages on text-based language processing, multimodal models, and speech processing, and provide a spotlight analysis focused on 21 low-resource South Asian languages, namely Saraiki, Assamese, Balochi, Bhojpuri, Bodo, Burmese, Chhattisgarhi, Dhivehi, Gujarati, Kannada, Kashmiri, Konkani, Khasi, Malayalam, Meitei, Nepali, Odia, Pashto, Rajasthani, Sindhi, and Telugu. We identify trends, challenges, and future research directions, using a step-wise approach that incorporates relevance classification and clustering based on large language models (LLMs). Our goal is to provide a breadth-first overview of the recent developments in South Asian language technologies to NLP researchers interested in working with South Asian languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00029v1-abstract-full').style.display = 'none'; document.getElementById('2501.00029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18956">arXiv:2412.18956</a> <span> [<a href="https://arxiv.org/pdf/2412.18956">pdf</a>, <a href="https://arxiv.org/format/2412.18956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Musings About the Future of Search: A Return to the Past? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jimmy Lin</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pankaj Gupta</a>, <a href="/search/cs?searchtype=author&query=Horn%2C+W">Will Horn</a>, <a href="/search/cs?searchtype=author&query=Mishne%2C+G">Gilad Mishne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18956v1-abstract-short" style="display: inline;"> When you have a question, the most effective way to have the question answered is to directly connect with experts on the topic and have a conversation with them. Prior to the invention of writing, this was the only way. Although effective, this solution exhibits scalability challenges. Writing allowed knowledge to be materialized, preserved, and replicated, enabling the development of different t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18956v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18956v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18956v1-abstract-full" style="display: none;"> When you have a question, the most effective way to have the question answered is to directly connect with experts on the topic and have a conversation with them. Prior to the invention of writing, this was the only way. Although effective, this solution exhibits scalability challenges. Writing allowed knowledge to be materialized, preserved, and replicated, enabling the development of different technologies over the centuries to connect information seekers with relevant information. This progression ultimately culminated in the ten-blue-links web search paradigm we're familiar with, just before the recent emergence of generative AI. However, we often forget that consuming static content is an imperfect solution. With the advent of large language models, it has become possible to develop a superior experience by allowing users to directly engage with experts. These interactions can of course satisfy information needs, but expert models can do so much more. This coming future requires reimagining search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18956v1-abstract-full').style.display = 'none'; document.getElementById('2412.18956v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16926">arXiv:2412.16926</a> <span> [<a href="https://arxiv.org/pdf/2412.16926">pdf</a>, <a href="https://arxiv.org/format/2412.16926">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Revisiting In-Context Learning with Long Context Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Baek%2C+J">Jinheon Baek</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S+J">Sun Jae Lee</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prakhar Gupta</a>, <a href="/search/cs?searchtype=author&query=Oh%2C+G">Geunseob Oh</a>, <a href="/search/cs?searchtype=author&query=Dalmia%2C+S">Siddharth Dalmia</a>, <a href="/search/cs?searchtype=author&query=Kolhar%2C+P">Prateek Kolhar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16926v2-abstract-short" style="display: inline;"> In-Context Learning (ICL) is a technique by which language models make predictions based on examples provided in their input context. Previously, their context window size imposed a limit on the number of examples that can be shown, making example selection techniques crucial for identifying the maximally effective set of examples. However, the recent advent of Long Context Language Models (LCLMs)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16926v2-abstract-full').style.display = 'inline'; document.getElementById('2412.16926v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16926v2-abstract-full" style="display: none;"> In-Context Learning (ICL) is a technique by which language models make predictions based on examples provided in their input context. Previously, their context window size imposed a limit on the number of examples that can be shown, making example selection techniques crucial for identifying the maximally effective set of examples. However, the recent advent of Long Context Language Models (LCLMs) has significantly increased the number of examples that can be included in context, raising an important question of whether ICL performance in a many-shot regime is still sensitive to the method of sample selection. To answer this, we revisit these approaches in the context of LCLMs through extensive experiments on 18 datasets spanning 4 tasks. Surprisingly, we observe that sophisticated example selection techniques do not yield significant improvements over a simple random sample selection method. Instead, we find that the advent of LCLMs has fundamentally shifted the challenge of ICL from that of selecting the most effective examples to that of collecting sufficient examples to fill the context window. Specifically, in certain datasets, including all available examples does not fully utilize the context window; however, by augmenting the examples in context with a simple data augmentation approach, we substantially improve ICL performance by 5%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16926v2-abstract-full').style.display = 'none'; document.getElementById('2412.16926v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07332">arXiv:2412.07332</a> <span> [<a href="https://arxiv.org/pdf/2412.07332">pdf</a>, <a href="https://arxiv.org/format/2412.07332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.oceaneng.2024.119164">10.1016/j.oceaneng.2024.119164 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Model predictive control-based trajectory generation for agile landing of unmanned aerial vehicle on a moving boat </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Proch%C3%A1zka%2C+O">Ond艡ej Proch谩zka</a>, <a href="/search/cs?searchtype=author&query=Nov%C3%A1k%2C+F">Filip Nov谩k</a>, <a href="/search/cs?searchtype=author&query=B%C3%A1%C4%8Da%2C+T">Tom谩拧 B谩膷a</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P+M">Parakh M. Gupta</a>, <a href="/search/cs?searchtype=author&query=P%C4%9Bni%C4%8Dka%2C+R">Robert P臎ni膷ka</a>, <a href="/search/cs?searchtype=author&query=Saska%2C+M">Martin Saska</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07332v1-abstract-short" style="display: inline;"> This paper proposes a novel trajectory generation method based on Model Predictive Control (MPC) for agile landing of an Unmanned Aerial Vehicle (UAV) onto an Unmanned Surface Vehicle (USV)'s deck in harsh conditions. The trajectory generation exploits the state predictions of the USV to create periodically updated trajectories for a multirotor UAV to precisely land on the deck of a moving USV eve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07332v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07332v1-abstract-full" style="display: none;"> This paper proposes a novel trajectory generation method based on Model Predictive Control (MPC) for agile landing of an Unmanned Aerial Vehicle (UAV) onto an Unmanned Surface Vehicle (USV)'s deck in harsh conditions. The trajectory generation exploits the state predictions of the USV to create periodically updated trajectories for a multirotor UAV to precisely land on the deck of a moving USV even in cases where the deck's inclination is continuously changing. We use an MPC-based scheme to create trajectories that consider both the UAV dynamics and the predicted states of the USV up to the first derivative of position and orientation. Compared to existing approaches, our method dynamically modifies the penalization matrices to precisely follow the corresponding states with respect to the flight phase. Especially during the landing maneuver, the UAV synchronizes attitude with the USV's, allowing for fast landing on a tilted deck. Simulations show the method's reliability in various sea conditions up to Rough sea (wave height 4 m), outperforming state-of-the-art methods in landing speed and accuracy, with twice the precision on average. Finally, real-world experiments validate the simulation results, demonstrating robust landings on a moving USV, while all computations are performed in real-time onboard the UAV. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07332v1-abstract-full').style.display = 'none'; document.getElementById('2412.07332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 17 figures, Ocean Engineering</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Ocean Engineering 313:119164, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00374">arXiv:2412.00374</a> <span> [<a href="https://arxiv.org/pdf/2412.00374">pdf</a>, <a href="https://arxiv.org/format/2412.00374">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LQ-Adapter: ViT-Adapter with Learnable Queries for Gallbladder Cancer Detection from Ultrasound Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Madan%2C+C">Chetan Madan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mayuna Gupta</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+S">Soumen Basu</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pankaj Gupta</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+C">Chetan Arora</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00374v1-abstract-short" style="display: inline;"> We focus on the problem of Gallbladder Cancer (GBC) detection from Ultrasound (US) images. The problem presents unique challenges to modern Deep Neural Network (DNN) techniques due to low image quality arising from noise, textures, and viewpoint variations. Tackling such challenges would necessitate precise localization performance by the DNN to identify the discerning features for the downstream… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00374v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00374v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00374v1-abstract-full" style="display: none;"> We focus on the problem of Gallbladder Cancer (GBC) detection from Ultrasound (US) images. The problem presents unique challenges to modern Deep Neural Network (DNN) techniques due to low image quality arising from noise, textures, and viewpoint variations. Tackling such challenges would necessitate precise localization performance by the DNN to identify the discerning features for the downstream malignancy prediction. While several techniques have been proposed in the recent years for the problem, all of these methods employ complex custom architectures. Inspired by the success of foundational models for natural image tasks, along with the use of adapters to fine-tune such models for the custom tasks, we investigate the merit of one such design, ViT-Adapter, for the GBC detection problem. We observe that ViT-Adapter relies predominantly on a primitive CNN-based spatial prior module to inject the localization information via cross-attention, which is inefficient for our problem due to the small pathology sizes, and variability in their appearances due to non-regular structure of the malignancy. In response, we propose, LQ-Adapter, a modified Adapter design for ViT, which improves localization information by leveraging learnable content queries over the basic spatial prior module. Our method surpasses existing approaches, enhancing the mean IoU (mIoU) scores by 5.4%, 5.8%, and 2.7% over ViT-Adapters, DINO, and FocalNet-DINO, respectively on the US image-based GBC detection dataset, and establishing a new state-of-the-art (SOTA). Additionally, we validate the applicability and effectiveness of LQ-Adapter on the Kvasir-Seg dataset for polyp detection from colonoscopy images. Superior performance of our design on this problem as well showcases its capability to handle diverse medical imaging tasks across different datasets. Code is released at https://github.com/ChetanMadan/LQ-Adapter <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00374v1-abstract-full').style.display = 'none'; document.getElementById('2412.00374v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11581">arXiv:2411.11581</a> <span> [<a href="https://arxiv.org/pdf/2411.11581">pdf</a>, <a href="https://arxiv.org/format/2411.11581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OASIS: Open Agent Social Interaction Simulations with One Million Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ziyi Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zaibin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zirui Zheng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuxian Jiang</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+Z">Ziyue Gan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zijian Ling</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jinsong Chen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M">Martz Ma</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+B">Bowen Dong</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prateek Gupta</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shuyue Hu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Zhenfei Yin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guohao Li</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+X">Xu Jia</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lijun Wang</a>, <a href="/search/cs?searchtype=author&query=Ghanem%2C+B">Bernard Ghanem</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Huchuan Lu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chaochao Lu</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&query=Torr%2C+P">Philip Torr</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+J">Jing Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11581v4-abstract-short" style="display: inline;"> There has been a growing interest in enhancing rule-based agent-based models (ABMs) for social media platforms (i.e., X, Reddit) with more realistic large language model (LLM) agents, thereby allowing for a more nuanced study of complex systems. As a result, several LLM-based ABMs have been proposed in the past year. While they hold promise, each simulator is specifically designed to study a parti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11581v4-abstract-full').style.display = 'inline'; document.getElementById('2411.11581v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11581v4-abstract-full" style="display: none;"> There has been a growing interest in enhancing rule-based agent-based models (ABMs) for social media platforms (i.e., X, Reddit) with more realistic large language model (LLM) agents, thereby allowing for a more nuanced study of complex systems. As a result, several LLM-based ABMs have been proposed in the past year. While they hold promise, each simulator is specifically designed to study a particular scenario, making it time-consuming and resource-intensive to explore other phenomena using the same ABM. Additionally, these models simulate only a limited number of agents, whereas real-world social media platforms involve millions of users. To this end, we propose OASIS, a generalizable and scalable social media simulator. OASIS is designed based on real-world social media platforms, incorporating dynamically updated environments (i.e., dynamic social networks and post information), diverse action spaces (i.e., following, commenting), and recommendation systems (i.e., interest-based and hot-score-based). Additionally, OASIS supports large-scale user simulations, capable of modeling up to one million users. With these features, OASIS can be easily extended to different social media platforms to study large-scale group phenomena and behaviors. We replicate various social phenomena, including information spreading, group polarization, and herd effects across X and Reddit platforms. Moreover, we provide observations of social phenomena at different agent group scales. We observe that the larger agent group scale leads to more enhanced group dynamics and more diverse and helpful agents' opinions. These findings demonstrate OASIS's potential as a powerful tool for studying complex systems in digital environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11581v4-abstract-full').style.display = 'none'; document.getElementById('2411.11581v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04838">arXiv:2411.04838</a> <span> [<a href="https://arxiv.org/pdf/2411.04838">pdf</a>, <a href="https://arxiv.org/format/2411.04838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Theory">hep-th</span> </div> </div> <p class="title is-5 mathjax"> Machine learning and optimization-based approaches to duality in statistical physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ferrari%2C+A+E+V">Andrea E. V. Ferrari</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prateek Gupta</a>, <a href="/search/cs?searchtype=author&query=Iqbal%2C+N">Nabil Iqbal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04838v1-abstract-short" style="display: inline;"> The notion of duality -- that a given physical system can have two different mathematical descriptions -- is a key idea in modern theoretical physics. Establishing a duality in lattice statistical mechanics models requires the construction of a dual Hamiltonian and a map from the original to the dual observables. By using simple neural networks to parameterize these maps and introducing a loss fun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04838v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04838v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04838v1-abstract-full" style="display: none;"> The notion of duality -- that a given physical system can have two different mathematical descriptions -- is a key idea in modern theoretical physics. Establishing a duality in lattice statistical mechanics models requires the construction of a dual Hamiltonian and a map from the original to the dual observables. By using simple neural networks to parameterize these maps and introducing a loss function that penalises the difference between correlation functions in original and dual models, we formulate the process of duality discovery as an optimization problem. We numerically solve this problem and show that our framework can rediscover the celebrated Kramers-Wannier duality for the 2d Ising model, reconstructing the known mapping of temperatures. We also discuss an alternative approach which uses known features of the mapping of topological lines to reduce the problem to optimizing the couplings in a dual Hamiltonian, and explore next-to-nearest neighbour deformations of the 2d Ising duality. We discuss future directions and prospects for discovering new dualities within this framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04838v1-abstract-full').style.display = 'none'; document.getElementById('2411.04838v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages + appendices, lots of plots</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02858">arXiv:2411.02858</a> <span> [<a href="https://arxiv.org/pdf/2411.02858">pdf</a>, <a href="https://arxiv.org/format/2411.02858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OLAF: A Plug-and-Play Framework for Enhanced Multi-object Multi-part Scene Parsing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranav Gupta</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+R">Rishubh Singh</a>, <a href="/search/cs?searchtype=author&query=Shenoy%2C+P">Pradeep Shenoy</a>, <a href="/search/cs?searchtype=author&query=Sarvadevabhatla%2C+R">Ravikiran Sarvadevabhatla</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02858v1-abstract-short" style="display: inline;"> Multi-object multi-part scene segmentation is a challenging task whose complexity scales exponentially with part granularity and number of scene objects. To address the task, we propose a plug-and-play approach termed OLAF. First, we augment the input (RGB) with channels containing object-based structural cues (fg/bg mask, boundary edge mask). We propose a weight adaptation technique which enables… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02858v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02858v1-abstract-full" style="display: none;"> Multi-object multi-part scene segmentation is a challenging task whose complexity scales exponentially with part granularity and number of scene objects. To address the task, we propose a plug-and-play approach termed OLAF. First, we augment the input (RGB) with channels containing object-based structural cues (fg/bg mask, boundary edge mask). We propose a weight adaptation technique which enables regular (RGB) pre-trained models to process the augmented (5-channel) input in a stable manner during optimization. In addition, we introduce an encoder module termed LDF to provide low-level dense feature guidance. This assists segmentation, particularly for smaller parts. OLAF enables significant mIoU gains of $\mathbf{3.3}$ (Pascal-Parts-58), $\mathbf{3.5}$ (Pascal-Parts-108) over the SOTA model. On the most challenging variant (Pascal-Parts-201), the gain is $\mathbf{4.0}$. Experimentally, we show that OLAF's broad applicability enables gains across multiple architectures (CNN, U-Net, Transformer) and datasets. The code is available at olafseg.github.io <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02858v1-abstract-full').style.display = 'none'; document.getElementById('2411.02858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in The European Conference on Computer Vision (ECCV) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01482">arXiv:2411.01482</a> <span> [<a href="https://arxiv.org/pdf/2411.01482">pdf</a>, <a href="https://arxiv.org/format/2411.01482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Metric Geometry">math.MG</span> </div> </div> <p class="title is-5 mathjax"> Membership Queries for Convex Floating Bodies via Hilbert Geometry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Purvi Gupta</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+A">Anant Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01482v1-abstract-short" style="display: inline;"> We propose the convex floating body membership problem, which consists of efficiently determining when a query point $a\in\mathbb{R}^d$ belongs to the so-called $\varepsilon$-convex floating body of a given bounded convex domain $K\subset\mathbb{R}^d$. We consider this problem in an approximate setting, i.e., given a parameter $未>0$, the query can be answered either way if the Hilbert distance in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01482v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01482v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01482v1-abstract-full" style="display: none;"> We propose the convex floating body membership problem, which consists of efficiently determining when a query point $a\in\mathbb{R}^d$ belongs to the so-called $\varepsilon$-convex floating body of a given bounded convex domain $K\subset\mathbb{R}^d$. We consider this problem in an approximate setting, i.e., given a parameter $未>0$, the query can be answered either way if the Hilbert distance in $K$ of $a$ from the boundary of a relatively-scaled $\varepsilon$-convex floating body is less than $未$. We present a data structure for this problem that has storage size $O(未^{-d}\varepsilon^{-(d-1)/2})$ and achieves query time of $O({未^{-1}}\ln 1/\varepsilon)$. Our construction is motivated by a recent work of Abdelkader and Mount on APM queries, and relies on a comparison of convex floating bodies with balls in the Hilbert metric on $K$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01482v1-abstract-full').style.display = 'none'; document.getElementById('2411.01482v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21892">arXiv:2410.21892</a> <span> [<a href="https://arxiv.org/pdf/2410.21892">pdf</a>, <a href="https://arxiv.org/format/2410.21892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Guided Diffusion-based Counterfactual Augmentation for Robust Session-based Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Muskan Gupta</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyanka Gupta</a>, <a href="/search/cs?searchtype=author&query=Vig%2C+L">Lovekesh Vig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21892v1-abstract-short" style="display: inline;"> Session-based recommendation (SR) models aim to recommend top-K items to a user, based on the user's behaviour during the current session. Several SR models are proposed in the literature, however,concerns have been raised about their susceptibility to inherent biases in the training data (observed data) such as popularity bias. SR models when trained on the biased training data may encounter perf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21892v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21892v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21892v1-abstract-full" style="display: none;"> Session-based recommendation (SR) models aim to recommend top-K items to a user, based on the user's behaviour during the current session. Several SR models are proposed in the literature, however,concerns have been raised about their susceptibility to inherent biases in the training data (observed data) such as popularity bias. SR models when trained on the biased training data may encounter performance challenges on out-of-distribution data in real-world scenarios. One way to mitigate popularity bias is counterfactual data augmentation. Compared to prior works that rely on generating data using SR models, we focus on utilizing the capabilities of state-of-the art diffusion models for generating counterfactual data. We propose a guided diffusion-based counterfactual augmentation framework for SR. Through a combination of offline and online experiments on a real-world and simulated dataset, respectively, we show that our approach performs significantly better than the baseline SR models and other state-of-the art augmentation frameworks. More importantly, our framework shows significant improvement on less popular target items, by achieving up to 20% gain in Recall and 13% gain in CTR on real-world and simulated datasets,respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21892v1-abstract-full').style.display = 'none'; document.getElementById('2410.21892v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19642">arXiv:2410.19642</a> <span> [<a href="https://arxiv.org/pdf/2410.19642">pdf</a>, <a href="https://arxiv.org/format/2410.19642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VARS: Vision-based Assessment of Risk in Security Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranav Gupta</a>, <a href="/search/cs?searchtype=author&query=Gohil%2C+P">Pratham Gohil</a>, <a href="/search/cs?searchtype=author&query=S%2C+S">Sridhar S</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19642v1-abstract-short" style="display: inline;"> The accurate prediction of danger levels in video content is critical for enhancing safety and security systems, particularly in environments where quick and reliable assessments are essential. In this study, we perform a comparative analysis of various machine learning and deep learning models to predict danger ratings in a custom dataset of 100 videos, each containing 50 frames, annotated with h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19642v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19642v1-abstract-full" style="display: none;"> The accurate prediction of danger levels in video content is critical for enhancing safety and security systems, particularly in environments where quick and reliable assessments are essential. In this study, we perform a comparative analysis of various machine learning and deep learning models to predict danger ratings in a custom dataset of 100 videos, each containing 50 frames, annotated with human-rated danger scores ranging from 0 to 10. The danger ratings are further classified into three categories: no alert (less than 7)and high alert (greater than equal to 7). Our evaluation covers classical machine learning models, such as Support Vector Machines, as well as Neural Networks, and transformer-based models. Model performance is assessed using standard metrics such as accuracy, F1-score, and mean absolute error (MAE), and the results are compared to identify the most robust approach. This research contributes to developing a more accurate and generalizable danger assessment framework for video-based risk detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19642v1-abstract-full').style.display = 'none'; document.getElementById('2410.19642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12798">arXiv:2410.12798</a> <span> [<a href="https://arxiv.org/pdf/2410.12798">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Design of an Efficient Fan-Shaped Clustered Trust-Based Routing Model with QoS & Security-Aware Side-Chaining for IoV Deployments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Suryawanshi%2C+S+R">Sadaf Ravindra Suryawanshi</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Praveen Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12798v1-abstract-short" style="display: inline;"> The rapid expansion of Internet of Vehicles (IoV) deployments has necessitated the creation of efficient and secure routing models to manage the massive data traffic generated by interconnected devices & vehicles. For IoV deployments, we propose a novel fan-shaped trust-based routing model with Quality of Service (QoS) and security-aware side-chaining. Our method employs temporal levels of delay,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12798v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12798v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12798v1-abstract-full" style="display: none;"> The rapid expansion of Internet of Vehicles (IoV) deployments has necessitated the creation of efficient and secure routing models to manage the massive data traffic generated by interconnected devices & vehicles. For IoV deployments, we propose a novel fan-shaped trust-based routing model with Quality of Service (QoS) and security-aware side-chaining. Our method employs temporal levels of delay, throughput, Packet Delivery Ratio (PDR), and energy consumption to determine optimal routing paths, thereby ensuring efficient data transmissions. We employ the Bacterial Foraging Optimizer (BFO) algorithm to manage side-chains within the network, which dynamically adjusts side-chain configurations to optimize system performance. The technique of fan-shaped clustering is used to group nodes into efficient clusters, allowing for more efficient communication and resource utilization sets. Extensive experimentation and performance analysis are utilized to evaluate the proposed model. Existing blockchain-based security models have been significantly improved by our findings. Our model achieves a remarkable 9.5% reduction in delay, a 10.5% improvement in throughput, a 2.9% improvement in PDR, and a 4.5% reduction in energy consumption compared to alternative approaches. In addition, we evaluate the model's resistance to Sybil, Masquerading, and Flooding attacks, which are prevalent security threats for IoV deployments. Even under these attack scenarios, our model provides consistently higher QoS levels compared to existing solutions, ensuring uninterrupted and reliable data transmissions. In IoV deployments, the proposed routing model and side-chaining management approach have numerous applications and use-cases like Smart cities, industrial automation, healthcare systems, transportation networks, and environmental monitoring. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12798v1-abstract-full').style.display = 'none'; document.getElementById('2410.12798v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://ijisae.org/index.php/IJISAE/article/view/3770</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IJISAE, 12 (1) 108-20 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11211">arXiv:2410.11211</a> <span> [<a href="https://arxiv.org/pdf/2410.11211">pdf</a>, <a href="https://arxiv.org/format/2410.11211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CVCP-Fusion: On Implicit Depth Estimation for 3D Bounding Box Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranav Gupta</a>, <a href="/search/cs?searchtype=author&query=Rengarajan%2C+R">Rishabh Rengarajan</a>, <a href="/search/cs?searchtype=author&query=Bankapur%2C+V">Viren Bankapur</a>, <a href="/search/cs?searchtype=author&query=Mannem%2C+V">Vedansh Mannem</a>, <a href="/search/cs?searchtype=author&query=Ahuja%2C+L">Lakshit Ahuja</a>, <a href="/search/cs?searchtype=author&query=Vijay%2C+S">Surya Vijay</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kevin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11211v2-abstract-short" style="display: inline;"> Combining LiDAR and Camera-view data has become a common approach for 3D Object Detection. However, previous approaches combine the two input streams at a point-level, throwing away semantic information derived from camera features. In this paper we propose Cross-View Center Point-Fusion, a state-of-the-art model to perform 3D object detection by combining camera and LiDAR-derived features in the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11211v2-abstract-full').style.display = 'inline'; document.getElementById('2410.11211v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11211v2-abstract-full" style="display: none;"> Combining LiDAR and Camera-view data has become a common approach for 3D Object Detection. However, previous approaches combine the two input streams at a point-level, throwing away semantic information derived from camera features. In this paper we propose Cross-View Center Point-Fusion, a state-of-the-art model to perform 3D object detection by combining camera and LiDAR-derived features in the BEV space to preserve semantic density from the camera stream while incorporating spacial data from the LiDAR stream. Our architecture utilizes aspects from previously established algorithms, Cross-View Transformers and CenterPoint, and runs their backbones in parallel, allowing efficient computation for real-time processing and application. In this paper we find that while an implicitly calculated depth-estimate may be sufficiently accurate in a 2D map-view representation, explicitly calculated geometric and spacial information is needed for precise bounding box prediction in the 3D world-view space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11211v2-abstract-full').style.display = 'none'; document.getElementById('2410.11211v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures. arXiv admin note: text overlap with arXiv:2205.02833 by other authors</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Curieux Academic Journal Part 2 Issue 43 (2024), pp. 626-634 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10584">arXiv:2410.10584</a> <span> [<a href="https://arxiv.org/pdf/2410.10584">pdf</a>, <a href="https://arxiv.org/format/2410.10584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> STACKFEED: Structured Textual Actor-Critic Knowledge Base Editing with FeedBack </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+N">Naman Gupta</a>, <a href="/search/cs?searchtype=author&query=Kirtania%2C+S">Shashank Kirtania</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Kariya%2C+K">Krishna Kariya</a>, <a href="/search/cs?searchtype=author&query=Gulwani%2C+S">Sumit Gulwani</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+A">Arun Iyer</a>, <a href="/search/cs?searchtype=author&query=Parthasarathy%2C+S">Suresh Parthasarathy</a>, <a href="/search/cs?searchtype=author&query=Radhakrishna%2C+A">Arjun Radhakrishna</a>, <a href="/search/cs?searchtype=author&query=Rajamani%2C+S+K">Sriram K. Rajamani</a>, <a href="/search/cs?searchtype=author&query=Soares%2C+G">Gustavo Soares</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10584v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) often generate incorrect or outdated information, especially in low-resource settings or when dealing with private data. To address this, Retrieval-Augmented Generation (RAG) uses external knowledge bases (KBs), but these can also suffer from inaccuracies. We introduce STACKFEED, a novel Structured Textual Actor-Critic Knowledge base editing with FEEDback approach that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10584v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10584v1-abstract-full" style="display: none;"> Large Language Models (LLMs) often generate incorrect or outdated information, especially in low-resource settings or when dealing with private data. To address this, Retrieval-Augmented Generation (RAG) uses external knowledge bases (KBs), but these can also suffer from inaccuracies. We introduce STACKFEED, a novel Structured Textual Actor-Critic Knowledge base editing with FEEDback approach that iteratively refines the KB based on expert feedback using a multi-actor, centralized critic reinforcement learning framework. Each document is assigned to an actor, modeled as a ReACT agent, which performs structured edits based on document-specific targeted instructions from a centralized critic. Experimental results show that STACKFEED significantly improves KB quality and RAG system performance, enhancing accuracy by up to 8% over baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10584v1-abstract-full').style.display = 'none'; document.getElementById('2410.10584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10270">arXiv:2410.10270</a> <span> [<a href="https://arxiv.org/pdf/2410.10270">pdf</a>, <a href="https://arxiv.org/format/2410.10270">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> QUIS: Question-guided Insights Generation for Automated Exploratory Data Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Manatkar%2C+A">Abhijit Manatkar</a>, <a href="/search/cs?searchtype=author&query=Akella%2C+A">Ashlesha Akella</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Parthivi Gupta</a>, <a href="/search/cs?searchtype=author&query=Narayanam%2C+K">Krishnasuri Narayanam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10270v3-abstract-short" style="display: inline;"> Discovering meaningful insights from a large dataset, known as Exploratory Data Analysis (EDA), is a challenging task that requires thorough exploration and analysis of the data. Automated Data Exploration (ADE) systems use goal-oriented methods with Large Language Models and Reinforcement Learning towards full automation. However, these methods require human involvement to anticipate goals that m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10270v3-abstract-full').style.display = 'inline'; document.getElementById('2410.10270v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10270v3-abstract-full" style="display: none;"> Discovering meaningful insights from a large dataset, known as Exploratory Data Analysis (EDA), is a challenging task that requires thorough exploration and analysis of the data. Automated Data Exploration (ADE) systems use goal-oriented methods with Large Language Models and Reinforcement Learning towards full automation. However, these methods require human involvement to anticipate goals that may limit insight extraction, while fully automated systems demand significant computational resources and retraining for new datasets. We introduce QUIS, a fully automated EDA system that operates in two stages: insight generation (ISGen) driven by question generation (QUGen). The QUGen module generates questions in iterations, refining them from previous iterations to enhance coverage without human intervention or manually curated examples. The ISGen module analyzes data to produce multiple relevant insights in response to each question, requiring no prior training and enabling QUIS to adapt to new datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10270v3-abstract-full').style.display = 'none'; document.getElementById('2410.10270v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for EMNLP 2024 Industry Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09927">arXiv:2410.09927</a> <span> [<a href="https://arxiv.org/pdf/2410.09927">pdf</a>, <a href="https://arxiv.org/format/2410.09927">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Lessons Learned: A Smart Campus Environment Using LoRaWAN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+H+P">Hari Prabhat Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09927v1-abstract-short" style="display: inline;"> The deployment of LoRaWAN (Long Range Wide Area Network) in dynamic environments, such as smart campuses, presents significant challenges in optimizing network parameters like spreading factor (SF), transmission power (TxPower), and managing mobility while ensuring reliable communication. In this paper, we first introduce the fundamental concepts of short-range and long-range communication protoco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09927v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09927v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09927v1-abstract-full" style="display: none;"> The deployment of LoRaWAN (Long Range Wide Area Network) in dynamic environments, such as smart campuses, presents significant challenges in optimizing network parameters like spreading factor (SF), transmission power (TxPower), and managing mobility while ensuring reliable communication. In this paper, we first introduce the fundamental concepts of short-range and long-range communication protocols, emphasizing the specific requirements and advantages of LoRaWAN in various applications. Next, we discuss smart space solutions that integrate Edge, Fog, and Cloud computing, illustrating how these paradigms work in conjunction with both short-range and long-range communication protocols to enhance data processing and decision-making capabilities in real-time. We then present our insights and lessons learned from the deployment of LoRaWAN across the campus, focusing on the challenges encountered and the strategies employed to address them. This work provides a comprehensive overview of the methodologies applied, the results achieved, and the implications for future research and practical applications in IoT-enabled smart environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09927v1-abstract-full').style.display = 'none'; document.getElementById('2410.09927v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06743">arXiv:2410.06743</a> <span> [<a href="https://arxiv.org/pdf/2410.06743">pdf</a>, <a href="https://arxiv.org/format/2410.06743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Utilizing Transfer Learning and pre-trained Models for Effective Forest Fire Detection: A Case Study of Uttarakhand </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+H+P">Hari Prabhat Gupta</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+R">Rahul Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06743v1-abstract-short" style="display: inline;"> Forest fires pose a significant threat to the environment, human life, and property. Early detection and response are crucial to mitigating the impact of these disasters. However, traditional forest fire detection methods are often hindered by our reliability on manual observation and satellite imagery with low spatial resolution. This paper emphasizes the role of transfer learning in enhancing fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06743v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06743v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06743v1-abstract-full" style="display: none;"> Forest fires pose a significant threat to the environment, human life, and property. Early detection and response are crucial to mitigating the impact of these disasters. However, traditional forest fire detection methods are often hindered by our reliability on manual observation and satellite imagery with low spatial resolution. This paper emphasizes the role of transfer learning in enhancing forest fire detection in India, particularly in overcoming data collection challenges and improving model accuracy across various regions. We compare traditional learning methods with transfer learning, focusing on the unique challenges posed by regional differences in terrain, climate, and vegetation. Transfer learning can be categorized into several types based on the similarity between the source and target tasks, as well as the type of knowledge transferred. One key method is utilizing pre-trained models for efficient transfer learning, which significantly reduces the need for extensive labeled data. We outline the transfer learning process, demonstrating how researchers can adapt pre-trained models like MobileNetV2 for specific tasks such as forest fire detection. Finally, we present experimental results from training and evaluating a deep learning model using the Uttarakhand forest fire dataset, showcasing the effectiveness of transfer learning in this context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06743v1-abstract-full').style.display = 'none'; document.getElementById('2410.06743v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00477">arXiv:2410.00477</a> <span> [<a href="https://arxiv.org/pdf/2410.00477">pdf</a>, <a href="https://arxiv.org/format/2410.00477">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ViDAS: Vision-based Danger Assessment and Scoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranav Gupta</a>, <a href="/search/cs?searchtype=author&query=Krishnan%2C+A">Advith Krishnan</a>, <a href="/search/cs?searchtype=author&query=Nanda%2C+N">Naman Nanda</a>, <a href="/search/cs?searchtype=author&query=Eswar%2C+A">Ananth Eswar</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+D">Deeksha Agarwal</a>, <a href="/search/cs?searchtype=author&query=Gohil%2C+P">Pratham Gohil</a>, <a href="/search/cs?searchtype=author&query=Goel%2C+P">Pratyush Goel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00477v1-abstract-short" style="display: inline;"> We present a novel dataset aimed at advancing danger analysis and assessment by addressing the challenge of quantifying danger in video content and identifying how human-like a Large Language Model (LLM) evaluator is for the same. This is achieved by compiling a collection of 100 YouTube videos featuring various events. Each video is annotated by human participants who provided danger ratings on a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00477v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00477v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00477v1-abstract-full" style="display: none;"> We present a novel dataset aimed at advancing danger analysis and assessment by addressing the challenge of quantifying danger in video content and identifying how human-like a Large Language Model (LLM) evaluator is for the same. This is achieved by compiling a collection of 100 YouTube videos featuring various events. Each video is annotated by human participants who provided danger ratings on a scale from 0 (no danger to humans) to 10 (life-threatening), with precise timestamps indicating moments of heightened danger. Additionally, we leverage LLMs to independently assess the danger levels in these videos using video summaries. We introduce Mean Squared Error (MSE) scores for multimodal meta-evaluation of the alignment between human and LLM danger assessments. Our dataset not only contributes a new resource for danger assessment in video content but also demonstrates the potential of LLMs in achieving human-like evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00477v1-abstract-full').style.display = 'none'; document.getElementById('2410.00477v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14043">arXiv:2409.14043</a> <span> [<a href="https://arxiv.org/pdf/2409.14043">pdf</a>, <a href="https://arxiv.org/format/2409.14043">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/CONECCT62155.2024.10677303">10.1109/CONECCT62155.2024.10677303 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ECHO: Environmental Sound Classification with Hierarchical Ontology-guided Semi-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranav Gupta</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+R">Raunak Sharma</a>, <a href="/search/cs?searchtype=author&query=Kumari%2C+R">Rashmi Kumari</a>, <a href="/search/cs?searchtype=author&query=Aditya%2C+S+K">Sri Krishna Aditya</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+S">Shwetank Choudhary</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+S">Sumit Kumar</a>, <a href="/search/cs?searchtype=author&query=M%2C+K">Kanchana M</a>, <a href="/search/cs?searchtype=author&query=R%2C+T">Thilagavathy R</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14043v1-abstract-short" style="display: inline;"> Environment Sound Classification has been a well-studied research problem in the field of signal processing and up till now more focus has been laid on fully supervised approaches. Over the last few years, focus has moved towards semi-supervised methods which concentrate on the utilization of unlabeled data, and self-supervised methods which learn the intermediate representation through pretext ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14043v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14043v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14043v1-abstract-full" style="display: none;"> Environment Sound Classification has been a well-studied research problem in the field of signal processing and up till now more focus has been laid on fully supervised approaches. Over the last few years, focus has moved towards semi-supervised methods which concentrate on the utilization of unlabeled data, and self-supervised methods which learn the intermediate representation through pretext task or contrastive learning. However, both approaches require a vast amount of unlabelled data to improve performance. In this work, we propose a novel framework called Environmental Sound Classification with Hierarchical Ontology-guided semi-supervised Learning (ECHO) that utilizes label ontology-based hierarchy to learn semantic representation by defining a novel pretext task. In the pretext task, the model tries to predict coarse labels defined by the Large Language Model (LLM) based on ground truth label ontology. The trained model is further fine-tuned in a supervised way to predict the actual task. Our proposed novel semi-supervised framework achieves an accuracy improvement in the range of 1\% to 8\% over baseline systems across three datasets namely UrbanSound8K, ESC-10, and ESC-50. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14043v1-abstract-full').style.display = 'none'; document.getElementById('2409.14043v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE CONECCT 2024, Signal Processing and Pattern Recognition, Environmental Sound Classification, ESC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08450">arXiv:2409.08450</a> <span> [<a href="https://arxiv.org/pdf/2409.08450">pdf</a>, <a href="https://arxiv.org/format/2409.08450">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Inter Observer Variability Assessment through Ordered Weighted Belief Divergence Measure in MAGDM Application to the Ensemble Classifier Feature Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pragya Gupta</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+D">Debjani Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+D">Debashree Guha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08450v1-abstract-short" style="display: inline;"> A large number of multi-attribute group decisionmaking (MAGDM) have been widely introduced to obtain consensus results. However, most of the methodologies ignore the conflict among the experts opinions and only consider equal or variable priorities of them. Therefore, this study aims to propose an Evidential MAGDM method by assessing the inter-observational variability and handling uncertainty tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08450v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08450v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08450v1-abstract-full" style="display: none;"> A large number of multi-attribute group decisionmaking (MAGDM) have been widely introduced to obtain consensus results. However, most of the methodologies ignore the conflict among the experts opinions and only consider equal or variable priorities of them. Therefore, this study aims to propose an Evidential MAGDM method by assessing the inter-observational variability and handling uncertainty that emerges between the experts. The proposed framework has fourfold contributions. First, the basic probability assignment (BPA) generation method is introduced to consider the inherent characteristics of each alternative by computing the degree of belief. Second, the ordered weighted belief and plausibility measure is constructed to capture the overall intrinsic information of the alternative by assessing the inter-observational variability and addressing the conflicts emerging between the group of experts. An ordered weighted belief divergence measure is constructed to acquire the weighted support for each group of experts to obtain the final preference relationship. Finally, we have shown an illustrative example of the proposed Evidential MAGDM framework. Further, we have analyzed the interpretation of Evidential MAGDM in the real-world application for ensemble classifier feature fusion to diagnose retinal disorders using optical coherence tomography images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08450v1-abstract-full').style.display = 'none'; document.getElementById('2409.08450v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03227">arXiv:2409.03227</a> <span> [<a href="https://arxiv.org/pdf/2409.03227">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.13140/RG.2.2.10125.55520">10.13140/RG.2.2.10125.55520 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Memristors based Computation and Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prashant Gupta</a>, <a href="/search/cs?searchtype=author&query=Jennifer%2C+P">Priscilla Jennifer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03227v1-abstract-short" style="display: inline;"> Memristor has been identified as the fourth fundamental circuit element by Dr. Leon Chua in 1971 and since then it has gathered a lot of interest because of its non-volatility and are considered as a viable solution to the beyond CMOS era computation. Recently, memristor have been used to perform basic logic operations like AND, OR, NAND, NOR, XOR etc. and are also used in applications like Dot Pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03227v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03227v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03227v1-abstract-full" style="display: none;"> Memristor has been identified as the fourth fundamental circuit element by Dr. Leon Chua in 1971 and since then it has gathered a lot of interest because of its non-volatility and are considered as a viable solution to the beyond CMOS era computation. Recently, memristor have been used to perform basic logic operations like AND, OR, NAND, NOR, XOR etc. and are also used in applications like Dot Product Engine, Convolution Neural Networks etc. This paper presents a new behavioural model of memristor then using it to build a 32-bit ripple carry adder. The paper later compares the area, power and time delay of the 32 bit Ripple Carry Adder using memristor with the 45nm CMOS technology and highlights its advantages and pitfalls. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03227v1-abstract-full').style.display = 'none'; document.getElementById('2409.03227v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01754">arXiv:2409.01754</a> <span> [<a href="https://arxiv.org/pdf/2409.01754">pdf</a>, <a href="https://arxiv.org/format/2409.01754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Empirical evidence of Large Language Model's influence on human spoken communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yakura%2C+H">Hiromu Yakura</a>, <a href="/search/cs?searchtype=author&query=Lopez-Lopez%2C+E">Ezequiel Lopez-Lopez</a>, <a href="/search/cs?searchtype=author&query=Brinkmann%2C+L">Levin Brinkmann</a>, <a href="/search/cs?searchtype=author&query=Serna%2C+I">Ignacio Serna</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prateek Gupta</a>, <a href="/search/cs?searchtype=author&query=Rahwan%2C+I">Iyad Rahwan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01754v1-abstract-short" style="display: inline;"> Artificial Intelligence (AI) agents now interact with billions of humans in natural language, thanks to advances in Large Language Models (LLMs) like ChatGPT. This raises the question of whether AI has the potential to shape a fundamental aspect of human culture: the way we speak. Recent analyses revealed that scientific publications already exhibit evidence of AI-specific language. But this evide… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01754v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01754v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01754v1-abstract-full" style="display: none;"> Artificial Intelligence (AI) agents now interact with billions of humans in natural language, thanks to advances in Large Language Models (LLMs) like ChatGPT. This raises the question of whether AI has the potential to shape a fundamental aspect of human culture: the way we speak. Recent analyses revealed that scientific publications already exhibit evidence of AI-specific language. But this evidence is inconclusive, since scientists may simply be using AI to copy-edit their writing. To explore whether AI has influenced human spoken communication, we transcribed and analyzed about 280,000 English-language videos of presentations, talks, and speeches from more than 20,000 YouTube channels of academic institutions. We find a significant shift in the trend of word usage specific to words distinctively associated with ChatGPT following its release. These findings provide the first empirical evidence that humans increasingly imitate LLMs in their spoken language. Our results raise societal and policy-relevant concerns about the potential of AI to unintentionally reduce linguistic diversity, or to be deliberately misused for mass manipulation. They also highlight the need for further investigation into the feedback loops between machine behavior and human culture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01754v1-abstract-full').style.display = 'none'; document.getElementById('2409.01754v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00718">arXiv:2409.00718</a> <span> [<a href="https://arxiv.org/pdf/2409.00718">pdf</a>, <a href="https://arxiv.org/format/2409.00718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multiscale Color Guided Attention Ensemble Classifier for Age-Related Macular Degeneration using Concurrent Fundus and Optical Coherence Tomography Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pragya Gupta</a>, <a href="/search/cs?searchtype=author&query=Mandal%2C+S">Subhamoy Mandal</a>, <a href="/search/cs?searchtype=author&query=Guha%2C+D">Debashree Guha</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+D">Debjani Chakraborty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00718v1-abstract-short" style="display: inline;"> Automatic diagnosis techniques have evolved to identify age-related macular degeneration (AMD) by employing single modality Fundus images or optical coherence tomography (OCT). To classify ocular diseases, fundus and OCT images are the most crucial imaging modalities used in the clinical setting. Most deep learning-based techniques are established on a single imaging modality, which contemplates t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00718v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00718v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00718v1-abstract-full" style="display: none;"> Automatic diagnosis techniques have evolved to identify age-related macular degeneration (AMD) by employing single modality Fundus images or optical coherence tomography (OCT). To classify ocular diseases, fundus and OCT images are the most crucial imaging modalities used in the clinical setting. Most deep learning-based techniques are established on a single imaging modality, which contemplates the ocular disorders to a specific extent and disregards other modality that comprises exhaustive information among distinct imaging modalities. This paper proposes a modality-specific multiscale color space embedding integrated with the attention mechanism based on transfer learning for classification (MCGAEc), which can efficiently extract the distinct modality information at various scales using the distinct color spaces. In this work, we first introduce the modality-specific multiscale color space encoder model, which includes diverse feature representations by integrating distinct characteristic color spaces on a multiscale into a unified framework. The extracted features from the prior encoder module are incorporated with the attention mechanism to extract the global features representation, which is integrated with the prior extracted features and transferred to the random forest classifier for the classification of AMD. To analyze the performance of the proposed MCGAEc method, a publicly available multi-modality dataset from Project Macula for AMD is utilized and compared with the existing models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00718v1-abstract-full').style.display = 'none'; document.getElementById('2409.00718v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27th International Conference on Pattern Recognition (ICPR) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09253">arXiv:2408.09253</a> <span> [<a href="https://arxiv.org/pdf/2408.09253">pdf</a>, <a href="https://arxiv.org/format/2408.09253">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning Compensated Model Predictive Control for Off-road Driving on Unknown Deformable Terrain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prakhar Gupta</a>, <a href="/search/cs?searchtype=author&query=Smereka%2C+J+M">Jonathon M. Smereka</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Y">Yunyi Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09253v1-abstract-short" style="display: inline;"> This study presents an Actor-Critic reinforcement learning Compensated Model Predictive Controller (AC2MPC) designed for high-speed, off-road autonomous driving on deformable terrains. Addressing the difficulty of modeling unknown tire-terrain interaction and ensuring real-time control feasibility and performance, this framework integrates deep reinforcement learning with a model predictive contro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09253v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09253v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09253v1-abstract-full" style="display: none;"> This study presents an Actor-Critic reinforcement learning Compensated Model Predictive Controller (AC2MPC) designed for high-speed, off-road autonomous driving on deformable terrains. Addressing the difficulty of modeling unknown tire-terrain interaction and ensuring real-time control feasibility and performance, this framework integrates deep reinforcement learning with a model predictive controller to manage unmodeled nonlinear dynamics. We evaluate the controller framework over constant and varying velocity profiles using high-fidelity simulator Project Chrono. Our findings demonstrate that our controller statistically outperforms standalone model-based and learning-based controllers over three unknown terrains that represent sandy deformable track, sandy and rocky track and cohesive clay-like deformable soil track. Despite varied and previously unseen terrain characteristics, this framework generalized well enough to track longitudinal reference speeds with the least error. Furthermore, this framework required significantly less training data compared to purely learning based controller, converging in fewer steps while delivering better performance. Even when under-trained, this controller outperformed the standalone controllers, highlighting its potential for safer and more efficient real-world deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09253v1-abstract-full').style.display = 'none'; document.getElementById('2408.09253v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE Transactions on Intelligent Vehicles as a Regular Paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03837">arXiv:2408.03837</a> <span> [<a href="https://arxiv.org/pdf/2408.03837">pdf</a>, <a href="https://arxiv.org/format/2408.03837">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> WalledEval: A Comprehensive Safety Evaluation Toolkit for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prannaya Gupta</a>, <a href="/search/cs?searchtype=author&query=Yau%2C+L+Q">Le Qi Yau</a>, <a href="/search/cs?searchtype=author&query=Low%2C+H+H">Hao Han Low</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+I">I-Shiang Lee</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+H+M">Hugo Maximus Lim</a>, <a href="/search/cs?searchtype=author&query=Teoh%2C+Y+X">Yu Xin Teoh</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+J+H">Jia Hng Koh</a>, <a href="/search/cs?searchtype=author&query=Liew%2C+D+W">Dar Win Liew</a>, <a href="/search/cs?searchtype=author&query=Bhardwaj%2C+R">Rishabh Bhardwaj</a>, <a href="/search/cs?searchtype=author&query=Bhardwaj%2C+R">Rajat Bhardwaj</a>, <a href="/search/cs?searchtype=author&query=Poria%2C+S">Soujanya Poria</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03837v3-abstract-short" style="display: inline;"> WalledEval is a comprehensive AI safety testing toolkit designed to evaluate large language models (LLMs). It accommodates a diverse range of models, including both open-weight and API-based ones, and features over 35 safety benchmarks covering areas such as multilingual safety, exaggerated safety, and prompt injections. The framework supports both LLM and judge benchmarking and incorporates custo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03837v3-abstract-full').style.display = 'inline'; document.getElementById('2408.03837v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03837v3-abstract-full" style="display: none;"> WalledEval is a comprehensive AI safety testing toolkit designed to evaluate large language models (LLMs). It accommodates a diverse range of models, including both open-weight and API-based ones, and features over 35 safety benchmarks covering areas such as multilingual safety, exaggerated safety, and prompt injections. The framework supports both LLM and judge benchmarking and incorporates custom mutators to test safety against various text-style mutations, such as future tense and paraphrasing. Additionally, WalledEval introduces WalledGuard, a new, small, and performant content moderation tool, and two datasets: SGXSTest and HIXSTest, which serve as benchmarks for assessing the exaggerated safety of LLMs and judges in cultural contexts. We make WalledEval publicly available at https://github.com/walledai/walledeval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03837v3-abstract-full').style.display = 'none'; document.getElementById('2408.03837v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18451">arXiv:2407.18451</a> <span> [<a href="https://arxiv.org/pdf/2407.18451">pdf</a>, <a href="https://arxiv.org/format/2407.18451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Gaussian Lane Keeping: A Robust Prediction Baseline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Isele%2C+D">David Isele</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Piyush Gupta</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xinyi Liu</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+S">Sangjae Bae</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18451v1-abstract-short" style="display: inline;"> Predicting agents' behavior for vehicles and pedestrians is challenging due to a myriad of factors including the uncertainty attached to different intentions, inter-agent interactions, traffic (environment) rules, individual inclinations, and agent dynamics. Consequently, a plethora of neural network-driven prediction models have been introduced in the literature to encompass these intricacies to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18451v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18451v1-abstract-full" style="display: none;"> Predicting agents' behavior for vehicles and pedestrians is challenging due to a myriad of factors including the uncertainty attached to different intentions, inter-agent interactions, traffic (environment) rules, individual inclinations, and agent dynamics. Consequently, a plethora of neural network-driven prediction models have been introduced in the literature to encompass these intricacies to accurately predict the agent behavior. Nevertheless, many of these approaches falter when confronted with scenarios beyond their training datasets, and lack interpretability, raising concerns about their suitability for real-world applications such as autonomous driving. Moreover, these models frequently demand additional training, substantial computational resources, or specific input features necessitating extensive implementation endeavors. In response, we propose Gaussian Lane Keeping (GLK), a robust prediction method for autonomous vehicles that can provide a solid baseline for comparison when developing new algorithms and a sanity check for real-world deployment. We provide several extensions to the GLK model, evaluate it on the CitySim dataset, and show that it outperforms the neural-network based predictions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18451v1-abstract-full').style.display = 'none'; document.getElementById('2407.18451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14645">arXiv:2407.14645</a> <span> [<a href="https://arxiv.org/pdf/2407.14645">pdf</a>, <a href="https://arxiv.org/format/2407.14645">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Performance Modeling and Workload Analysis of Distributed Large Language Model Training and Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kundu%2C+J">Joyjit Kundu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wenzhe Guo</a>, <a href="/search/cs?searchtype=author&query=BanaGozar%2C+A">Ali BanaGozar</a>, <a href="/search/cs?searchtype=author&query=De+Alwis%2C+U">Udari De Alwis</a>, <a href="/search/cs?searchtype=author&query=Sengupta%2C+S">Sourav Sengupta</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Puneet Gupta</a>, <a href="/search/cs?searchtype=author&query=Mallik%2C+A">Arindam Mallik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14645v1-abstract-short" style="display: inline;"> Aligning future system design with the ever-increasing compute needs of large language models (LLMs) is undoubtedly an important problem in today's world. Here, we propose a general performance modeling methodology and workload analysis of distributed LLM training and inference through an analytical framework that accurately considers compute, memory sub-system, network, and various parallelizatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14645v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14645v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14645v1-abstract-full" style="display: none;"> Aligning future system design with the ever-increasing compute needs of large language models (LLMs) is undoubtedly an important problem in today's world. Here, we propose a general performance modeling methodology and workload analysis of distributed LLM training and inference through an analytical framework that accurately considers compute, memory sub-system, network, and various parallelization strategies (model parallel, data parallel, pipeline parallel, and sequence parallel). We validate our performance predictions with published data from literature and relevant industry vendors (e.g., NVIDIA). For distributed training, we investigate the memory footprint of LLMs for different activation re-computation methods, dissect the key factors behind the massive performance gain from A100 to B200 ($\sim$ 35x speed-up closely following NVIDIA's scaling trend), and further run a design space exploration at different technology nodes (12 nm to 1 nm) to study the impact of logic, memory, and network scaling on the performance. For inference, we analyze the compute versus memory boundedness of different operations at a matrix-multiply level for different GPU systems and further explore the impact of DRAM memory technology scaling on inference latency. Utilizing our modeling framework, we reveal the evolution of performance bottlenecks for both LLM training and inference with technology scaling, thus, providing insights to design future systems for LLM training and inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14645v1-abstract-full').style.display = 'none'; document.getElementById('2407.14645v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08876">arXiv:2407.08876</a> <span> [<a href="https://arxiv.org/pdf/2407.08876">pdf</a>, <a href="https://arxiv.org/format/2407.08876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DegustaBot: Zero-Shot Visual Preference Estimation for Personalized Multi-Object Rearrangement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Newman%2C+B+A">Benjamin A. Newman</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pranay Gupta</a>, <a href="/search/cs?searchtype=author&query=Kitani%2C+K">Kris Kitani</a>, <a href="/search/cs?searchtype=author&query=Bisk%2C+Y">Yonatan Bisk</a>, <a href="/search/cs?searchtype=author&query=Admoni%2C+H">Henny Admoni</a>, <a href="/search/cs?searchtype=author&query=Paxton%2C+C">Chris Paxton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08876v1-abstract-short" style="display: inline;"> De gustibus non est disputandum ("there is no accounting for others' tastes") is a common Latin maxim describing how many solutions in life are determined by people's personal preferences. Many household tasks, in particular, can only be considered fully successful when they account for personal preferences such as the visual aesthetic of the scene. For example, setting a table could be optimized… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08876v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08876v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08876v1-abstract-full" style="display: none;"> De gustibus non est disputandum ("there is no accounting for others' tastes") is a common Latin maxim describing how many solutions in life are determined by people's personal preferences. Many household tasks, in particular, can only be considered fully successful when they account for personal preferences such as the visual aesthetic of the scene. For example, setting a table could be optimized by arranging utensils according to traditional rules of Western table setting decorum, without considering the color, shape, or material of each object, but this may not be a completely satisfying solution for a given person. Toward this end, we present DegustaBot, an algorithm for visual preference learning that solves household multi-object rearrangement tasks according to personal preference. To do this, we use internet-scale pre-trained vision-and-language foundation models (VLMs) with novel zero-shot visual prompting techniques. To evaluate our method, we collect a large dataset of naturalistic personal preferences in a simulated table-setting task, and conduct a user study in order to develop two novel metrics for determining success based on personal preference. This is a challenging problem and we find that 50% of our model's predictions are likely to be found acceptable by at least 20% of people. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08876v1-abstract-full').style.display = 'none'; document.getElementById('2407.08876v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02514">arXiv:2407.02514</a> <span> [<a href="https://arxiv.org/pdf/2407.02514">pdf</a>, <a href="https://arxiv.org/format/2407.02514">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LOGIC-LM++: Multi-Step Refinement for Symbolic Formulations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kirtania%2C+S">Shashank Kirtania</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Radhakirshna%2C+A">Arjun Radhakirshna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02514v3-abstract-short" style="display: inline;"> In this paper we examine the limitations of Large Language Models (LLMs) for complex reasoning tasks. Although recent works have started to employ formal languages as an intermediate representation for reasoning tasks, they often face challenges in accurately generating and refining these formal specifications to ensure correctness. To address these issues, this paper proposes Logic-LM++, an impro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02514v3-abstract-full').style.display = 'inline'; document.getElementById('2407.02514v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02514v3-abstract-full" style="display: none;"> In this paper we examine the limitations of Large Language Models (LLMs) for complex reasoning tasks. Although recent works have started to employ formal languages as an intermediate representation for reasoning tasks, they often face challenges in accurately generating and refining these formal specifications to ensure correctness. To address these issues, this paper proposes Logic-LM++, an improvement on Logic-LM . It uses the ability of LLMs to do pairwise comparisons, allowing the evaluation of the refinements suggested by the LLM. The paper demonstrates that Logic-LM++ outperforms Logic-LM and other contemporary techniques across natural language reasoning tasks on three datasets, FOLIO, ProofWriter and AR-LSAT, with an average improvement of 18.5% on standard prompting, 12.3% on chain of thought prompting and 5% on Logic-LM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02514v3-abstract-full').style.display = 'none'; document.getElementById('2407.02514v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19580">arXiv:2406.19580</a> <span> [<a href="https://arxiv.org/pdf/2406.19580">pdf</a>, <a href="https://arxiv.org/format/2406.19580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FRED: Flexible REduction-Distribution Interconnect and Communication Implementation for Wafer-Scale Distributed Training of DNN Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rashidi%2C+S">Saeed Rashidi</a>, <a href="/search/cs?searchtype=author&query=Won%2C+W">William Won</a>, <a href="/search/cs?searchtype=author&query=Srinivasan%2C+S">Sudarshan Srinivasan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Puneet Gupta</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+T">Tushar Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19580v1-abstract-short" style="display: inline;"> Distributed Deep Neural Network (DNN) training is a technique to reduce the training overhead by distributing the training tasks into multiple accelerators, according to a parallelization strategy. However, high-performance compute and interconnects are needed for maximum speed-up and linear scaling of the system. Wafer-scale systems are a promising technology that allows for tightly integrating h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19580v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19580v1-abstract-full" style="display: none;"> Distributed Deep Neural Network (DNN) training is a technique to reduce the training overhead by distributing the training tasks into multiple accelerators, according to a parallelization strategy. However, high-performance compute and interconnects are needed for maximum speed-up and linear scaling of the system. Wafer-scale systems are a promising technology that allows for tightly integrating high-end accelerators with high-speed wafer-scale interconnects, making it an attractive platform for distributed training. However, the wafer-scale interconnect should offer high performance and flexibility for various parallelization strategies to enable maximum optimizations for compute and memory usage. In this paper, we propose FRED, a wafer-scale interconnect that is tailored for the high-BW requirements of wafer-scale networks and can efficiently execute communication patterns of different parallelization strategies. Furthermore, FRED supports in-switch collective communication execution that reduces the network traffic by approximately 2X. Our results show that FRED can improve the average end-to-end training time of ResNet-152, Transformer-17B, GPT-3, and Transformer-1T by 1.76X, 1.87X, 1.34X, and 1.4X, respectively when compared to a baseline waferscale 2D-Mesh fabric. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19580v1-abstract-full').style.display = 'none'; document.getElementById('2406.19580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19545">arXiv:2406.19545</a> <span> [<a href="https://arxiv.org/pdf/2406.19545">pdf</a>, <a href="https://arxiv.org/format/2406.19545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Machine-Generated Rationales to Facilitate Social Meaning Detection in Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dutt%2C+R">Ritam Dutt</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhen Wu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+K">Kelly Shi</a>, <a href="/search/cs?searchtype=author&query=Sheth%2C+D">Divyanshu Sheth</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prakhar Gupta</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C+P">Carolyn Penstein Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19545v1-abstract-short" style="display: inline;"> We present a generalizable classification approach that leverages Large Language Models (LLMs) to facilitate the detection of implicitly encoded social meaning in conversations. We design a multi-faceted prompt to extract a textual explanation of the reasoning that connects visible cues to underlying social meanings. These extracted explanations or rationales serve as augmentations to the conversa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19545v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19545v1-abstract-full" style="display: none;"> We present a generalizable classification approach that leverages Large Language Models (LLMs) to facilitate the detection of implicitly encoded social meaning in conversations. We design a multi-faceted prompt to extract a textual explanation of the reasoning that connects visible cues to underlying social meanings. These extracted explanations or rationales serve as augmentations to the conversational text to facilitate dialogue understanding and transfer. Our empirical results over 2,340 experimental settings demonstrate the significant positive impact of adding these rationales. Our findings hold true for in-domain classification, zero-shot, and few-shot domain transfer for two different social meaning detection tasks, each spanning two different corpora. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19545v1-abstract-full').style.display = 'none'; document.getElementById('2406.19545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at The Proceedings of the Association for Computational Linguistics, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14236">arXiv:2406.14236</a> <span> [<a href="https://arxiv.org/pdf/2406.14236">pdf</a>, <a href="https://arxiv.org/format/2406.14236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> NAC-QFL: Noise Aware Clustered Quantum Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sahu%2C+H">Himanshu Sahu</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+H+P">Hari Prabhat Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14236v1-abstract-short" style="display: inline;"> Recent advancements in quantum computing, alongside successful deployments of quantum communication, hold promises for revolutionizing mobile networks. While Quantum Machine Learning (QML) presents opportunities, it contends with challenges like noise in quantum devices and scalability. Furthermore, the high cost of quantum communication constrains the practical application of QML in real-world sc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14236v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14236v1-abstract-full" style="display: none;"> Recent advancements in quantum computing, alongside successful deployments of quantum communication, hold promises for revolutionizing mobile networks. While Quantum Machine Learning (QML) presents opportunities, it contends with challenges like noise in quantum devices and scalability. Furthermore, the high cost of quantum communication constrains the practical application of QML in real-world scenarios. This paper introduces a noise-aware clustered quantum federated learning system that addresses noise mitigation, limited quantum device capacity, and high quantum communication costs in distributed QML. It employs noise modelling and clustering to select devices with minimal noise and distribute QML tasks efficiently. Using circuit partitioning to deploy smaller models on low-noise devices and aggregating similar devices, the system enhances distributed QML performance and reduces communication costs. Leveraging circuit cutting, QML techniques are more effective for smaller circuit sizes and fidelity. We conduct experimental evaluations to assess the performance of the proposed system. Additionally, we introduce a noisy dataset for QML to demonstrate the impact of noise on proposed accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14236v1-abstract-full').style.display = 'none'; document.getElementById('2406.14236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17469">arXiv:2405.17469</a> <span> [<a href="https://arxiv.org/pdf/2405.17469">pdf</a>, <a href="https://arxiv.org/format/2405.17469">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> A Dataset for Research on Water Sustainability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P+S">Pranjol Sen Gupta</a>, <a href="/search/cs?searchtype=author&query=Hossen%2C+M+R">Md Rajib Hossen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pengfei Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Shaolei Ren</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M+A">Mohammad A. Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17469v1-abstract-short" style="display: inline;"> Freshwater scarcity is a global problem that requires collective efforts across all industry sectors. Nevertheless, a lack of access to operational water footprint data bars many applications from exploring optimization opportunities hidden within the temporal and spatial variations. To break this barrier into research in water sustainability, we build a dataset for operation direct water usage in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17469v1-abstract-full').style.display = 'inline'; document.getElementById('2405.17469v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17469v1-abstract-full" style="display: none;"> Freshwater scarcity is a global problem that requires collective efforts across all industry sectors. Nevertheless, a lack of access to operational water footprint data bars many applications from exploring optimization opportunities hidden within the temporal and spatial variations. To break this barrier into research in water sustainability, we build a dataset for operation direct water usage in the cooling systems and indirect water embedded in electricity generation. Our dataset consists of the hourly water efficiency of major U.S. cities and states from 2019 to 2023. We also offer cooling system models that capture the impact of weather on water efficiency. We present a preliminary analysis of our dataset and discuss three potential applications that can benefit from it. Our dataset is publicly available at Open Science Framework (OSF) <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17469v1-abstract-full').style.display = 'none'; document.getElementById('2405.17469v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM e-Energy 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13009">arXiv:2405.13009</a> <span> [<a href="https://arxiv.org/pdf/2405.13009">pdf</a>, <a href="https://arxiv.org/format/2405.13009">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MetaReflection: Learning Instructions for Language Agents using Past Reflections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Priyanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Kirtania%2C+S">Shashank Kirtania</a>, <a href="/search/cs?searchtype=author&query=Singha%2C+A">Ananya Singha</a>, <a href="/search/cs?searchtype=author&query=Gulwani%2C+S">Sumit Gulwani</a>, <a href="/search/cs?searchtype=author&query=Radhakrishna%2C+A">Arjun Radhakrishna</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Sherry Shi</a>, <a href="/search/cs?searchtype=author&query=Soares%2C+G">Gustavo Soares</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13009v2-abstract-short" style="display: inline;"> The popularity of Large Language Models (LLMs) have unleashed a new age ofLanguage Agents for solving a diverse range of tasks. While contemporary frontier LLMs are capable enough to power reasonably good Language agents, the closed-API model makes it hard to improve in cases they perform sub-optimally. To address this, recent works have explored ways to improve their performance using techniques… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13009v2-abstract-full').style.display = 'inline'; document.getElementById('2405.13009v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13009v2-abstract-full" style="display: none;"> The popularity of Large Language Models (LLMs) have unleashed a new age ofLanguage Agents for solving a diverse range of tasks. While contemporary frontier LLMs are capable enough to power reasonably good Language agents, the closed-API model makes it hard to improve in cases they perform sub-optimally. To address this, recent works have explored ways to improve their performance using techniques like self-reflection and prompt optimization. Unfortunately, techniques like self-reflection can be used only in an online setup, while contemporary prompt optimization techniques are designed and tested to work on simple tasks. To this end, we introduce MetaReflection, a novel offline reinforcement learning technique that enhances the performance of Language Agents by augmenting a semantic memory based on experiential learnings from past trials. We demonstrate the efficacy of MetaReflection by evaluating across multiple domains, including complex logical reasoning, biomedical semantic similarity, open world question answering, and vulnerability threat detection, in Infrastructure-as-Code, spanning different agent designs. MetaReflection boosts Language agents' performance by 4% to 16.82% over the raw GPT-4 baseline and performs on par with existing state-of-the-art prompt optimization techniques while requiring fewer LLM calls. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13009v2-abstract-full').style.display = 'none'; document.getElementById('2405.13009v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We release our experimental code at: https://aka.ms/metareflection-code</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11651">arXiv:2405.11651</a> <span> [<a href="https://arxiv.org/pdf/2405.11651">pdf</a>, <a href="https://arxiv.org/format/2405.11651">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Movie Revenue Prediction using Machine Learning Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Udandarao%2C+V">Vikranth Udandarao</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pratyush Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11651v1-abstract-short" style="display: inline;"> In the contemporary film industry, accurately predicting a movie's earnings is paramount for maximizing profitability. This project aims to develop a machine learning model for predicting movie earnings based on input features like the movie name, the MPAA rating of the movie, the genre of the movie, the year of release of the movie, the IMDb Rating, the votes by the watchers, the director, the wr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11651v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11651v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11651v1-abstract-full" style="display: none;"> In the contemporary film industry, accurately predicting a movie's earnings is paramount for maximizing profitability. This project aims to develop a machine learning model for predicting movie earnings based on input features like the movie name, the MPAA rating of the movie, the genre of the movie, the year of release of the movie, the IMDb Rating, the votes by the watchers, the director, the writer and the leading cast, the country of production of the movie, the budget of the movie, the production company and the runtime of the movie. Through a structured methodology involving data collection, preprocessing, analysis, model selection, evaluation, and improvement, a robust predictive model is constructed. Linear Regression, Decision Trees, Random Forest Regression, Bagging, XGBoosting and Gradient Boosting have been trained and tested. Model improvement strategies include hyperparameter tuning and cross-validation. The resulting model offers promising accuracy and generalization, facilitating informed decision-making in the film industry to maximize profits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11651v1-abstract-full').style.display = 'none'; document.getElementById('2405.11651v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">for associated code base, see https://github.com/Vikranth3140/Movie-Revenue-Prediction</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01616">arXiv:2405.01616</a> <span> [<a href="https://arxiv.org/pdf/2405.01616">pdf</a>, <a href="https://arxiv.org/format/2405.01616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generative Active Learning for the Search of Small-molecule Protein Binders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Korablyov%2C+M">Maksym Korablyov</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Cheng-Hao Liu</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+M">Moksh Jain</a>, <a href="/search/cs?searchtype=author&query=van+der+Sloot%2C+A+M">Almer M. van der Sloot</a>, <a href="/search/cs?searchtype=author&query=Jolicoeur%2C+E">Eric Jolicoeur</a>, <a href="/search/cs?searchtype=author&query=Ruediger%2C+E">Edward Ruediger</a>, <a href="/search/cs?searchtype=author&query=Nica%2C+A+C">Andrei Cristian Nica</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+E">Emmanuel Bengio</a>, <a href="/search/cs?searchtype=author&query=Lapchevskyi%2C+K">Kostiantyn Lapchevskyi</a>, <a href="/search/cs?searchtype=author&query=St-Cyr%2C+D">Daniel St-Cyr</a>, <a href="/search/cs?searchtype=author&query=Schuetz%2C+D+A">Doris Alexandra Schuetz</a>, <a href="/search/cs?searchtype=author&query=Butoi%2C+V+I">Victor Ion Butoi</a>, <a href="/search/cs?searchtype=author&query=Rector-Brooks%2C+J">Jarrid Rector-Brooks</a>, <a href="/search/cs?searchtype=author&query=Blackburn%2C+S">Simon Blackburn</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Leo Feng</a>, <a href="/search/cs?searchtype=author&query=Nekoei%2C+H">Hadi Nekoei</a>, <a href="/search/cs?searchtype=author&query=Gottipati%2C+S">SaiKrishna Gottipati</a>, <a href="/search/cs?searchtype=author&query=Vijayan%2C+P">Priyesh Vijayan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prateek Gupta</a>, <a href="/search/cs?searchtype=author&query=Ramp%C3%A1%C5%A1ek%2C+L">Ladislav Ramp谩拧ek</a>, <a href="/search/cs?searchtype=author&query=Avancha%2C+S">Sasikanth Avancha</a>, <a href="/search/cs?searchtype=author&query=Bacon%2C+P">Pierre-Luc Bacon</a>, <a href="/search/cs?searchtype=author&query=Hamilton%2C+W+L">William L. Hamilton</a>, <a href="/search/cs?searchtype=author&query=Paige%2C+B">Brooks Paige</a>, <a href="/search/cs?searchtype=author&query=Misra%2C+S">Sanchit Misra</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01616v1-abstract-short" style="display: inline;"> Despite substantial progress in machine learning for scientific discovery in recent years, truly de novo design of small molecules which exhibit a property of interest remains a significant challenge. We introduce LambdaZero, a generative active learning approach to search for synthesizable molecules. Powered by deep reinforcement learning, LambdaZero learns to search over the vast space of molecu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01616v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01616v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01616v1-abstract-full" style="display: none;"> Despite substantial progress in machine learning for scientific discovery in recent years, truly de novo design of small molecules which exhibit a property of interest remains a significant challenge. We introduce LambdaZero, a generative active learning approach to search for synthesizable molecules. Powered by deep reinforcement learning, LambdaZero learns to search over the vast space of molecules to discover candidates with a desired property. We apply LambdaZero with molecular docking to design novel small molecules that inhibit the enzyme soluble Epoxide Hydrolase 2 (sEH), while enforcing constraints on synthesizability and drug-likeliness. LambdaZero provides an exponential speedup in terms of the number of calls to the expensive molecular docking oracle, and LambdaZero de novo designed molecules reach docking scores that would otherwise require the virtual screening of a hundred billion molecules. Importantly, LambdaZero discovers novel scaffolds of synthesizable, drug-like inhibitors for sEH. In in vitro experimental validation, a series of ligands from a generated quinazoline-based scaffold were synthesized, and the lead inhibitor N-(4,6-di(pyrrolidin-1-yl)quinazolin-2-yl)-N-methylbenzamide (UM0152893) displayed sub-micromolar enzyme inhibition of sEH. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01616v1-abstract-full').style.display = 'none'; document.getElementById('2405.01616v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16556">arXiv:2404.16556</a> <span> [<a href="https://arxiv.org/pdf/2404.16556">pdf</a>, <a href="https://arxiv.org/format/2404.16556">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Conditional Distribution Modelling for Few-Shot Image Synthesis with Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Parul Gupta</a>, <a href="/search/cs?searchtype=author&query=Hayat%2C+M">Munawar Hayat</a>, <a href="/search/cs?searchtype=author&query=Dhall%2C+A">Abhinav Dhall</a>, <a href="/search/cs?searchtype=author&query=Do%2C+T">Thanh-Toan Do</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16556v2-abstract-short" style="display: inline;"> Few-shot image synthesis entails generating diverse and realistic images of novel categories using only a few example images. While multiple recent efforts in this direction have achieved impressive results, the existing approaches are dependent only upon the few novel samples available at test time in order to generate new images, which restricts the diversity of the generated images. To overcome… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16556v2-abstract-full').style.display = 'inline'; document.getElementById('2404.16556v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16556v2-abstract-full" style="display: none;"> Few-shot image synthesis entails generating diverse and realistic images of novel categories using only a few example images. While multiple recent efforts in this direction have achieved impressive results, the existing approaches are dependent only upon the few novel samples available at test time in order to generate new images, which restricts the diversity of the generated images. To overcome this limitation, we propose Conditional Distribution Modelling (CDM) -- a framework which effectively utilizes Diffusion models for few-shot image generation. By modelling the distribution of the latent space used to condition a Diffusion process, CDM leverages the learnt statistics of the training data to get a better approximation of the unseen class distribution, thereby removing the bias arising due to limited number of few shot samples. Simultaneously, we devise a novel inversion based optimization strategy that further improves the approximated unseen class distribution, and ensures the fidelity of the generated samples to the unseen class. The experimental results on four benchmark datasets demonstrate the effectiveness of our proposed CDM for few-shot generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16556v2-abstract-full').style.display = 'none'; document.getElementById('2404.16556v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05859">arXiv:2404.05859</a> <span> [<a href="https://arxiv.org/pdf/2404.05859">pdf</a>, <a href="https://arxiv.org/format/2404.05859">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> </div> </div> <p class="title is-5 mathjax"> Box Filtration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Alvarado%2C+E">Enrique Alvarado</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prashant Gupta</a>, <a href="/search/cs?searchtype=author&query=Krishnamoorthy%2C+B">Bala Krishnamoorthy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05859v3-abstract-short" style="display: inline;"> We define a new framework that unifies the filtration and mapper approaches from TDA, and present efficient algorithms to compute it. Termed the box filtration of a PCD, we grow boxes (hyperrectangles) that are not necessarily centered at each point (in place of balls centered at points). We grow the boxes non-uniformly and asymmetrically in different dimensions based on the distribution of points… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05859v3-abstract-full').style.display = 'inline'; document.getElementById('2404.05859v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05859v3-abstract-full" style="display: none;"> We define a new framework that unifies the filtration and mapper approaches from TDA, and present efficient algorithms to compute it. Termed the box filtration of a PCD, we grow boxes (hyperrectangles) that are not necessarily centered at each point (in place of balls centered at points). We grow the boxes non-uniformly and asymmetrically in different dimensions based on the distribution of points. We present two approaches to handle the boxes: a point cover where each point is assigned its own box at start, and a pixel cover that works with a pixelization of the space of the PCD. Any box cover in either setting automatically gives a mapper of the PCD. We show that the persistence diagrams generated by the box filtration using both point and pixel covers satisfy the classical stability based on the Gromov-Hausdorff distance. Using boxes also implies that the box filtration is identical for pairwise or higher order intersections whereas the VR and Cech filtration are not the same. Growth in each dimension is computed by solving a linear program (LP) that optimizes a cost functional balancing the cost of expansion and benefit of including more points in the box. The box filtration algorithm runs in $O(m|U(0)|\log(mn蟺)L(q))$ time, where $m$ is number of steps of increments considered for box growth, $|U(0)|$ is the number of boxes in the initial cover ($\leq$ number of points), $蟺$ is the step length for increasing each box dimension, each LP is solved in $O(L(q))$ time, $n$ is the PCD dimension, and $q = n \times |X|$. We demonstrate through multiple examples that the box filtration can produce more accurate results to summarize the topology of the PCD than VR and distance-to-measure (DTM) filtrations. Software for our implementation is available at https://github.com/pragup/Box-Filteration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05859v3-abstract-full').style.display = 'none'; document.getElementById('2404.05859v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 55N31; 62R40; </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01746">arXiv:2404.01746</a> <span> [<a href="https://arxiv.org/pdf/2404.01746">pdf</a>, <a href="https://arxiv.org/format/2404.01746">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Scalable & Efficient Interaction-Aware Planning in Autonomous Vehicles using Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Piyush Gupta</a>, <a href="/search/cs?searchtype=author&query=Isele%2C+D">David Isele</a>, <a href="/search/cs?searchtype=author&query=Bae%2C+S">Sangjae Bae</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01746v1-abstract-short" style="display: inline;"> Real-world driving involves intricate interactions among vehicles navigating through dense traffic scenarios. Recent research focuses on enhancing the interaction awareness of autonomous vehicles to leverage these interactions in decision-making. These interaction-aware planners rely on neural-network-based prediction models to capture inter-vehicle interactions, aiming to integrate these predicti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01746v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01746v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01746v1-abstract-full" style="display: none;"> Real-world driving involves intricate interactions among vehicles navigating through dense traffic scenarios. Recent research focuses on enhancing the interaction awareness of autonomous vehicles to leverage these interactions in decision-making. These interaction-aware planners rely on neural-network-based prediction models to capture inter-vehicle interactions, aiming to integrate these predictions with traditional control techniques such as Model Predictive Control. However, this integration of deep learning-based models with traditional control paradigms often results in computationally demanding optimization problems, relying on heuristic methods. This study introduces a principled and efficient method for combining deep learning with constrained optimization, employing knowledge distillation to train smaller and more efficient networks, thereby mitigating complexity. We demonstrate that these refined networks maintain the problem-solving efficacy of larger models while significantly accelerating optimization. Specifically, in the domain of interaction-aware trajectory planning for autonomous vehicles, we illustrate that training a smaller prediction network using knowledge distillation speeds up optimization without sacrificing accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01746v1-abstract-full').style.display = 'none'; document.getElementById('2404.01746v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08848">arXiv:2403.08848</a> <span> [<a href="https://arxiv.org/pdf/2403.08848">pdf</a>, <a href="https://arxiv.org/format/2403.08848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Basu%2C+S">Soumen Basu</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mayuna Gupta</a>, <a href="/search/cs?searchtype=author&query=Madan%2C+C">Chetan Madan</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pankaj Gupta</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+C">Chetan Arora</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08848v2-abstract-short" style="display: inline;"> In recent years, automated Gallbladder Cancer (GBC) detection has gained the attention of researchers. Current state-of-the-art (SOTA) methodologies relying on ultrasound sonography (US) images exhibit limited generalization, emphasizing the need for transformative approaches. We observe that individual US frames may lack sufficient information to capture disease manifestation. This study advocate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08848v2-abstract-full').style.display = 'inline'; document.getElementById('2403.08848v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08848v2-abstract-full" style="display: none;"> In recent years, automated Gallbladder Cancer (GBC) detection has gained the attention of researchers. Current state-of-the-art (SOTA) methodologies relying on ultrasound sonography (US) images exhibit limited generalization, emphasizing the need for transformative approaches. We observe that individual US frames may lack sufficient information to capture disease manifestation. This study advocates for a paradigm shift towards video-based GBC detection, leveraging the inherent advantages of spatiotemporal representations. Employing the Masked Autoencoder (MAE) for representation learning, we address shortcomings in conventional image-based methods. We propose a novel design called FocusMAE to systematically bias the selection of masking tokens from high-information regions, fostering a more refined representation of malignancy. Additionally, we contribute the most extensive US video dataset for GBC detection. We also note that, this is the first study on US video-based GBC detection. We validate the proposed methods on the curated dataset, and report a new state-of-the-art (SOTA) accuracy of 96.4% for the GBC detection problem, against an accuracy of 84% by current Image-based SOTA - GBCNet, and RadFormer, and 94.7% by Video-based SOTA - AdaMAE. We further demonstrate the generality of the proposed FocusMAE on a public CT-based Covid detection dataset, reporting an improvement in accuracy by 3.3% over current baselines. The source code and pretrained models are available at: https://gbc-iitd.github.io/focusmae <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08848v2-abstract-full').style.display = 'none'; document.getElementById('2403.08848v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To Appear at CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.12566">arXiv:2402.12566</a> <span> [<a href="https://arxiv.org/pdf/2402.12566">pdf</a>, <a href="https://arxiv.org/format/2402.12566">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GenAudit: Fixing Factual Errors in Language Model Outputs with Evidence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Krishna%2C+K">Kundan Krishna</a>, <a href="/search/cs?searchtype=author&query=Ramprasad%2C+S">Sanjana Ramprasad</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prakhar Gupta</a>, <a href="/search/cs?searchtype=author&query=Wallace%2C+B+C">Byron C. Wallace</a>, <a href="/search/cs?searchtype=author&query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&query=Bigham%2C+J+P">Jeffrey P. Bigham</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.12566v3-abstract-short" style="display: inline;"> LLMs can generate factually incorrect statements even when provided access to reference documents. Such errors can be dangerous in high-stakes applications (e.g., document-grounded QA for healthcare or finance). We present GenAudit -- a tool intended to assist fact-checking LLM responses for document-grounded tasks. GenAudit suggests edits to the LLM response by revising or removing claims that ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12566v3-abstract-full').style.display = 'inline'; document.getElementById('2402.12566v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.12566v3-abstract-full" style="display: none;"> LLMs can generate factually incorrect statements even when provided access to reference documents. Such errors can be dangerous in high-stakes applications (e.g., document-grounded QA for healthcare or finance). We present GenAudit -- a tool intended to assist fact-checking LLM responses for document-grounded tasks. GenAudit suggests edits to the LLM response by revising or removing claims that are not supported by the reference document, and also presents evidence from the reference for facts that do appear to have support. We train models to execute these tasks, and design an interactive interface to present suggested edits and evidence to users. Comprehensive evaluation by human raters shows that GenAudit can detect errors in 8 different LLM outputs when summarizing documents from diverse domains. User studies demonstrate that using GenAudit can substantially improve the performance of humans at finding errors in LLM-generated summaries. We release our tool (GenAudit) and fact-checking model for public use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12566v3-abstract-full').style.display = 'none'; document.getElementById('2402.12566v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code and models available at https://genaudit.org</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15906">arXiv:2401.15906</a> <span> [<a href="https://arxiv.org/pdf/2401.15906">pdf</a>, <a href="https://arxiv.org/format/2401.15906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Mean Estimation with User-Level Privacy for Spatio-Temporal IoT Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rameshwar%2C+V+A">V. Arvind Rameshwar</a>, <a href="/search/cs?searchtype=author&query=Tandon%2C+A">Anshoo Tandon</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Prajjwal Gupta</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+V">Aditya Vikram Singh</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+N">Novoneel Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Abhay Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15906v7-abstract-short" style="display: inline;"> This paper considers the problem of the private release of sample means of speed values from traffic datasets. Our key contribution is the development of user-level differentially private algorithms that incorporate carefully chosen parameter values to ensure low estimation errors on real-world datasets, while ensuring privacy. We test our algorithms on ITMS (Intelligent Traffic Management System)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15906v7-abstract-full').style.display = 'inline'; document.getElementById('2401.15906v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15906v7-abstract-full" style="display: none;"> This paper considers the problem of the private release of sample means of speed values from traffic datasets. Our key contribution is the development of user-level differentially private algorithms that incorporate carefully chosen parameter values to ensure low estimation errors on real-world datasets, while ensuring privacy. We test our algorithms on ITMS (Intelligent Traffic Management System) data from an Indian city, where the speeds of different buses are drawn in a potentially non-i.i.d. manner from an unknown distribution, and where the number of speed samples contributed by different buses is potentially different. We then apply our algorithms to large synthetic datasets, generated based on the ITMS data. Here, we provide theoretical justification for the observed performance trends, and also provide recommendations for the choices of algorithm subroutines that result in low estimation errors. Finally, we characterize the best performance of pseudo-user creation-based algorithms on worst-case datasets via a minimax approach; this then gives rise to a novel procedure for the creation of pseudo-users, which optimizes the worst-case total estimation error. The algorithms discussed in the paper are readily applicable to general spatio-temporal IoT datasets for releasing a differentially private mean of a desired value. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15906v7-abstract-full').style.display = 'none'; document.getElementById('2401.15906v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 5 figures, submitted to the ACM for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.14283">arXiv:2401.14283</a> <span> [<a href="https://arxiv.org/pdf/2401.14283">pdf</a>, <a href="https://arxiv.org/ps/2401.14283">ps</a>, <a href="https://arxiv.org/format/2401.14283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Information Leakage Detection through Approximate Bayes-optimal Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pritha Gupta</a>, <a href="/search/cs?searchtype=author&query=Wever%2C+M">Marcel Wever</a>, <a href="/search/cs?searchtype=author&query=H%C3%BCllermeier%2C+E">Eyke H眉llermeier</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.14283v2-abstract-short" style="display: inline;"> In today's data-driven world, the proliferation of publicly available information raises security concerns due to the information leakage (IL) problem. IL involves unintentionally exposing sensitive information to unauthorized parties via observable system information. Conventional statistical approaches rely on estimating mutual information (MI) between observable and secret information for detec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14283v2-abstract-full').style.display = 'inline'; document.getElementById('2401.14283v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.14283v2-abstract-full" style="display: none;"> In today's data-driven world, the proliferation of publicly available information raises security concerns due to the information leakage (IL) problem. IL involves unintentionally exposing sensitive information to unauthorized parties via observable system information. Conventional statistical approaches rely on estimating mutual information (MI) between observable and secret information for detecting ILs, face challenges of the curse of dimensionality, convergence, computational complexity, and MI misestimation. Though effective, emerging supervised machine learning based approaches to detect ILs are limited to binary system sensitive information and lack a comprehensive framework. To address these limitations, we establish a theoretical framework using statistical learning theory and information theory to quantify and detect IL accurately. Using automated machine learning, we demonstrate that MI can be accurately estimated by approximating the typically unknown Bayes predictor's log-loss and accuracy. Based on this, we show how MI can effectively be estimated to detect ILs. Our method performs superior to state-of-the-art baselines in an empirical study considering synthetic and real-world OpenSSL TLS server datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14283v2-abstract-full').style.display = 'none'; document.getElementById('2401.14283v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under submission in Information Sciences</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 94A15; 62H30; 94A60 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.5.1; G.3; E.3 </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Gupta%2C+P&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository