Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 967 results for author: <span class="mathjax">Singh, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Singh%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Singh, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Singh%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Singh, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Singh%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14341">arXiv:2411.14341</a> <span> [<a href="https://arxiv.org/pdf/2411.14341">pdf</a>, <a href="https://arxiv.org/format/2411.14341">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Logarithmic Neyman Regret for Adaptive Estimation of the Average Treatment Effect </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Neopane%2C+O">Ojash Neopane</a>, <a href="/search/cs?searchtype=author&query=Ramdas%2C+A">Aaditya Ramdas</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Aarti Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14341v1-abstract-short" style="display: inline;"> Estimation of the Average Treatment Effect (ATE) is a core problem in causal inference with strong connections to Off-Policy Evaluation in Reinforcement Learning. This paper considers the problem of adaptively selecting the treatment allocation probability in order to improve estimation of the ATE. The majority of prior work on adaptive ATE estimation focus on asymptotic guarantees, and in turn ov… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14341v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14341v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14341v1-abstract-full" style="display: none;"> Estimation of the Average Treatment Effect (ATE) is a core problem in causal inference with strong connections to Off-Policy Evaluation in Reinforcement Learning. This paper considers the problem of adaptively selecting the treatment allocation probability in order to improve estimation of the ATE. The majority of prior work on adaptive ATE estimation focus on asymptotic guarantees, and in turn overlooks important practical considerations such as the difficulty of learning the optimal treatment allocation as well as hyper-parameter selection. Existing non-asymptotic methods are limited by poor empirical performance and exponential scaling of the Neyman regret with respect to problem parameters. In order to address these gaps, we propose and analyze the Clipped Second Moment Tracking (ClipSMT) algorithm, a variant of an existing algorithm with strong asymptotic optimality guarantees, and provide finite sample bounds on its Neyman regret. Our analysis shows that ClipSMT achieves exponential improvements in Neyman regret on two fronts: improving the dependence on $T$ from $O(\sqrt{T})$ to $O(\log T)$, as well as reducing the exponential dependence on problem parameters to a polynomial dependence. Finally, we conclude with simulations which show the marked improvement of ClipSMT over existing approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14341v1-abstract-full').style.display = 'none'; document.getElementById('2411.14341v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 2 figures. Submitted to AISTATS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14199">arXiv:2411.14199</a> <span> [<a href="https://arxiv.org/pdf/2411.14199">pdf</a>, <a href="https://arxiv.org/format/2411.14199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OpenScholar: Synthesizing Scientific Literature with Retrieval-augmented LMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Asai%2C+A">Akari Asai</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jacqueline He</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+R">Rulin Shao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amanpreet Singh</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/cs?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a>, <a href="/search/cs?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/cs?searchtype=author&query=D%27arcy%2C+M">Mike D'arcy</a>, <a href="/search/cs?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/cs?searchtype=author&query=Latzke%2C+M">Matt Latzke</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+M">Minyang Tian</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+P">Pan Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shengyan Liu</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+H">Hao Tong</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bohao Wu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Y">Yanyu Xiong</a>, <a href="/search/cs?searchtype=author&query=Zettlemoyer%2C+L">Luke Zettlemoyer</a>, <a href="/search/cs?searchtype=author&query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&query=Weld%2C+D">Dan Weld</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/cs?searchtype=author&query=Yih%2C+W">Wen-tau Yih</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+P+W">Pang Wei Koh</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14199v1-abstract-short" style="display: inline;"> Scientific progress depends on researchers' ability to synthesize the growing body of literature. Can large language models (LMs) assist scientists in this task? We introduce OpenScholar, a specialized retrieval-augmented LM that answers scientific queries by identifying relevant passages from 45 million open-access papers and synthesizing citation-backed responses. To evaluate OpenScholar, we dev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14199v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14199v1-abstract-full" style="display: none;"> Scientific progress depends on researchers' ability to synthesize the growing body of literature. Can large language models (LMs) assist scientists in this task? We introduce OpenScholar, a specialized retrieval-augmented LM that answers scientific queries by identifying relevant passages from 45 million open-access papers and synthesizing citation-backed responses. To evaluate OpenScholar, we develop ScholarQABench, the first large-scale multi-domain benchmark for literature search, comprising 2,967 expert-written queries and 208 long-form answers across computer science, physics, neuroscience, and biomedicine. On ScholarQABench, OpenScholar-8B outperforms GPT-4o by 5% and PaperQA2 by 7% in correctness, despite being a smaller, open model. While GPT4o hallucinates citations 78 to 90% of the time, OpenScholar achieves citation accuracy on par with human experts. OpenScholar's datastore, retriever, and self-feedback inference loop also improves off-the-shelf LMs: for instance, OpenScholar-GPT4o improves GPT-4o's correctness by 12%. In human evaluations, experts preferred OpenScholar-8B and OpenScholar-GPT4o responses over expert-written ones 51% and 70% of the time, respectively, compared to GPT4o's 32%. We open-source all of our code, models, datastore, data and a public demo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14199v1-abstract-full').style.display = 'none'; document.getElementById('2411.14199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14100">arXiv:2411.14100</a> <span> [<a href="https://arxiv.org/pdf/2411.14100">pdf</a>, <a href="https://arxiv.org/format/2411.14100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> BEST-STD: Bidirectional Mamba-Enhanced Speech Tokenization for Spoken Term Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Anup Singh</a>, <a href="/search/cs?searchtype=author&query=Demuynck%2C+K">Kris Demuynck</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+V">Vipul Arora</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14100v1-abstract-short" style="display: inline;"> Spoken term detection (STD) is often hindered by reliance on frame-level features and the computationally intensive DTW-based template matching, limiting its practicality. To address these challenges, we propose a novel approach that encodes speech into discrete, speaker-agnostic semantic tokens. This facilitates fast retrieval using text-based search algorithms and effectively handles out-of-voca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14100v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14100v1-abstract-full" style="display: none;"> Spoken term detection (STD) is often hindered by reliance on frame-level features and the computationally intensive DTW-based template matching, limiting its practicality. To address these challenges, we propose a novel approach that encodes speech into discrete, speaker-agnostic semantic tokens. This facilitates fast retrieval using text-based search algorithms and effectively handles out-of-vocabulary terms. Our approach focuses on generating consistent token sequences across varying utterances of the same term. We also propose a bidirectional state space modeling within the Mamba encoder, trained in a self-supervised learning framework, to learn contextual frame-level features that are further encoded into discrete tokens. Our analysis shows that our speech tokens exhibit greater speaker invariance than those from existing tokenizers, making them more suitable for STD tasks. Empirical evaluation on LibriSpeech and TIMIT databases indicates that our method outperforms existing STD baselines while being more efficient. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14100v1-abstract-full').style.display = 'none'; document.getElementById('2411.14100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12681">arXiv:2411.12681</a> <span> [<a href="https://arxiv.org/pdf/2411.12681">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AI Guided Early Screening of Cervical Cancer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=S%2C+D">Dharanidharan S I</a>, <a href="/search/cs?searchtype=author&query=S%2C+S+R">Suhitha Renuka S V</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ajishi Singh</a>, <a href="/search/cs?searchtype=author&query=Pravin%2C+S+C">Sheena Christabel Pravin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12681v1-abstract-short" style="display: inline;"> In order to support the creation of reliable machine learning models for anomaly detection, this project focuses on preprocessing, enhancing, and organizing a medical imaging dataset. There are two classifications in the dataset: normal and abnormal, along with extra noise fluctuations. In order to improve the photographs' quality, undesirable artifacts, including visible medical equipment at the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12681v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12681v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12681v1-abstract-full" style="display: none;"> In order to support the creation of reliable machine learning models for anomaly detection, this project focuses on preprocessing, enhancing, and organizing a medical imaging dataset. There are two classifications in the dataset: normal and abnormal, along with extra noise fluctuations. In order to improve the photographs' quality, undesirable artifacts, including visible medical equipment at the edges, were eliminated using central cropping. Adjusting the brightness and contrast was one of the additional preprocessing processes. Normalization was then performed to normalize the data. To make classification jobs easier, the dataset was methodically handled by combining several image subsets into two primary categories: normal and pathological. To provide a strong training set that adapts well to real-world situations, sophisticated picture preprocessing techniques were used, such as contrast enhancement and real-time augmentation (including rotations, zooms, and brightness modifications). To guarantee efficient model evaluation, the data was subsequently divided into training and testing subsets. In order to create precise and effective machine learning models for medical anomaly detection, high-quality input data is ensured via this thorough approach. Because of the project pipeline's flexible and scalable design, it can be easily integrated with bigger clinical decision-support systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12681v1-abstract-full').style.display = 'none'; document.getElementById('2411.12681v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09204">arXiv:2411.09204</a> <span> [<a href="https://arxiv.org/pdf/2411.09204">pdf</a>, <a href="https://arxiv.org/format/2411.09204">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> RibCageImp: A Deep Learning Framework for 3D Ribcage Implant Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chaubey%2C+G">Gyanendra Chaubey</a>, <a href="/search/cs?searchtype=author&query=Farooq%2C+A">Aiman Farooq</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Azad Singh</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+D">Deepak Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09204v1-abstract-short" style="display: inline;"> The recovery of damaged or resected ribcage structures requires precise, custom-designed implants to restore the integrity and functionality of the thoracic cavity. Traditional implant design methods rely mainly on manual processes, making them time-consuming and susceptible to variability. In this work, we explore the feasibility of automated ribcage implant generation using deep learning. We pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09204v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09204v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09204v1-abstract-full" style="display: none;"> The recovery of damaged or resected ribcage structures requires precise, custom-designed implants to restore the integrity and functionality of the thoracic cavity. Traditional implant design methods rely mainly on manual processes, making them time-consuming and susceptible to variability. In this work, we explore the feasibility of automated ribcage implant generation using deep learning. We present a framework based on 3D U-Net architecture that processes CT scans to generate patient-specific implant designs. To the best of our knowledge, this is the first investigation into automated thoracic implant generation using deep learning approaches. Our preliminary results, while moderate, highlight both the potential and the significant challenges in this complex domain. These findings establish a foundation for future research in automated ribcage reconstruction and identify key technical challenges that need to be addressed for practical implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09204v1-abstract-full').style.display = 'none'; document.getElementById('2411.09204v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06251">arXiv:2411.06251</a> <span> [<a href="https://arxiv.org/pdf/2411.06251">pdf</a>, <a href="https://arxiv.org/format/2411.06251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Quasi-random Multi-Sample Inference for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parashar%2C+A">Aditya Parashar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+V">Aditya Vikram Singh</a>, <a href="/search/cs?searchtype=author&query=Amballa%2C+A">Avinash Amballa</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+J">Jinlin Lai</a>, <a href="/search/cs?searchtype=author&query=Rozonoyer%2C+B">Benjamin Rozonoyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06251v1-abstract-short" style="display: inline;"> Large language models (LLMs) are often equipped with multi-sample decoding strategies. An LLM implicitly defines an arithmetic code book, facilitating efficient and embarrassingly parallelizable \textbf{arithmetic sampling} to produce multiple samples using quasi-random codes. Traditional text generation methods, such as beam search and sampling-based techniques, have notable limitations: they lac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06251v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06251v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06251v1-abstract-full" style="display: none;"> Large language models (LLMs) are often equipped with multi-sample decoding strategies. An LLM implicitly defines an arithmetic code book, facilitating efficient and embarrassingly parallelizable \textbf{arithmetic sampling} to produce multiple samples using quasi-random codes. Traditional text generation methods, such as beam search and sampling-based techniques, have notable limitations: they lack parallelizability or diversity of sampled sequences. This study explores the potential of arithmetic sampling, contrasting it with ancestral sampling across two decoding tasks that employ multi-sample inference: chain-of-thought reasoning with self-consistency and machine translation with minimum Bayes risk decoding. Our results demonstrate that arithmetic sampling produces more diverse samples, significantly improving reasoning and translation performance as the sample size increases. We observe a $\mathbf{3\text{-}5\%}$ point increase in accuracy on the GSM8K dataset and a $\mathbf{0.45\text{-}0.89\%}$ point increment in COMET score for WMT19 tasks using arithmetic sampling without any significant computational overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06251v1-abstract-full').style.display = 'none'; document.getElementById('2411.06251v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05734">arXiv:2411.05734</a> <span> [<a href="https://arxiv.org/pdf/2411.05734">pdf</a>, <a href="https://arxiv.org/format/2411.05734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Poze: Sports Technique Feedback under Data Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Agamdeep Singh</a>, <a href="/search/cs?searchtype=author&query=PB%2C+S">Sujit PB</a>, <a href="/search/cs?searchtype=author&query=Vatsa%2C+M">Mayank Vatsa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05734v1-abstract-short" style="display: inline;"> Access to expert coaching is essential for developing technique in sports, yet economic barriers often place it out of reach for many enthusiasts. To bridge this gap, we introduce Poze, an innovative video processing framework that provides feedback on human motion, emulating the insights of a professional coach. Poze combines pose estimation with sequence comparison and is optimized to function e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05734v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05734v1-abstract-full" style="display: none;"> Access to expert coaching is essential for developing technique in sports, yet economic barriers often place it out of reach for many enthusiasts. To bridge this gap, we introduce Poze, an innovative video processing framework that provides feedback on human motion, emulating the insights of a professional coach. Poze combines pose estimation with sequence comparison and is optimized to function effectively with minimal data. Poze surpasses state-of-the-art vision-language models in video question-answering frameworks, achieving 70% and 196% increase in accuracy over GPT4V and LLaVAv1.6 7b, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05734v1-abstract-full').style.display = 'none'; document.getElementById('2411.05734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05368">arXiv:2411.05368</a> <span> [<a href="https://arxiv.org/pdf/2411.05368">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s11277-024-11114-2">10.1007/s11277-024-11114-2 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Comparative Study of MAC Protocols for Wireless Mesh Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ankita Singh</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+S">Shiv Prakash</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S">Sudhakar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05368v1-abstract-short" style="display: inline;"> Wireless networking is encouraged by the constant enhancement of sensors' ability and wireless communication. To provide service quality support for multimedia viz. audio and video streams, the IEEE 802.11e MAC (Media Access Control) improves basic 802.11 MAC. IEEE 802.11 standard series such as IEEE 802.11a, b, g, n, p, and ac have been promoted and specified in the current communications and con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05368v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05368v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05368v1-abstract-full" style="display: none;"> Wireless networking is encouraged by the constant enhancement of sensors' ability and wireless communication. To provide service quality support for multimedia viz. audio and video streams, the IEEE 802.11e MAC (Media Access Control) improves basic 802.11 MAC. IEEE 802.11 standard series such as IEEE 802.11a, b, g, n, p, and ac have been promoted and specified in the current communications and connection development. Each standard has functionality that matches the kind of applications for which the standard is intended. IEEE 802.11ac has better performance with fewer interferences and achieves gigabits per second capacity transfer rates. This paper discusses the comparative examination of the IEEE 802.11a, IEEE 802.11b, IEEE 802.11g, IEEE 802.11n, IEEE 802.11p, and IEEE 802.11ac standards which increase accuracy and performance pertaining to the IEEE 802.11 standard. In this paper, we investigate the design requirements for numerous simultaneous peer-to-peer connections. Further, this study offers a systematic review and analysis of the MAC layer in WMN (Wireless Mesh Network) and also highlights their open research issues and challenges. Finally, this paper discusses various potential directions for future research in this area with an emphasis on their strengths and limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05368v1-abstract-full').style.display = 'none'; document.getElementById('2411.05368v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 5 figures, to be published in Wireless Pers Commun</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> D-22-00117 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Wireless Pers Commun 135, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05359">arXiv:2411.05359</a> <span> [<a href="https://arxiv.org/pdf/2411.05359">pdf</a>, <a href="https://arxiv.org/format/2411.05359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Agricultural Landscape Understanding At Country-Scale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dua%2C+R">Radhika Dua</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+N">Nikita Saxena</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Aditi Agarwal</a>, <a href="/search/cs?searchtype=author&query=Wilson%2C+A">Alex Wilson</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gaurav Singh</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+H">Hoang Tran</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+I">Ishan Deshpande</a>, <a href="/search/cs?searchtype=author&query=Kaur%2C+A">Amandeep Kaur</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+G">Gaurav Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Nath%2C+C">Chandan Nath</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+A">Arnab Basu</a>, <a href="/search/cs?searchtype=author&query=Batchu%2C+V">Vishal Batchu</a>, <a href="/search/cs?searchtype=author&query=Holla%2C+S">Sharath Holla</a>, <a href="/search/cs?searchtype=author&query=Kurle%2C+B">Bindiya Kurle</a>, <a href="/search/cs?searchtype=author&query=Missura%2C+O">Olana Missura</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+R">Rahul Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shubhika Garg</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+N">Nishi Shah</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Avneet Singh</a>, <a href="/search/cs?searchtype=author&query=Tewari%2C+D">Dinesh Tewari</a>, <a href="/search/cs?searchtype=author&query=Dondzik%2C+A">Agata Dondzik</a>, <a href="/search/cs?searchtype=author&query=Adsul%2C+B">Bharat Adsul</a>, <a href="/search/cs?searchtype=author&query=Sohoni%2C+M">Milind Sohoni</a>, <a href="/search/cs?searchtype=author&query=Praveen%2C+A+R">Asim Rama Praveen</a>, <a href="/search/cs?searchtype=author&query=Dangi%2C+A">Aaryan Dangi</a> , et al. (10 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05359v1-abstract-short" style="display: inline;"> Agricultural landscapes are quite complex, especially in the Global South where fields are smaller, and agricultural practices are more varied. In this paper we report on our progress in digitizing the agricultural landscape (natural and man-made) in our study region of India. We use high resolution imagery and a UNet style segmentation model to generate the first of its kind national-scale multi-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05359v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05359v1-abstract-full" style="display: none;"> Agricultural landscapes are quite complex, especially in the Global South where fields are smaller, and agricultural practices are more varied. In this paper we report on our progress in digitizing the agricultural landscape (natural and man-made) in our study region of India. We use high resolution imagery and a UNet style segmentation model to generate the first of its kind national-scale multi-class panoptic segmentation output. Through this work we have been able to identify individual fields across 151.7M hectares, and delineating key features such as water resources and vegetation. We share how this output was validated by our team and externally by downstream users, including some sample use cases that can lead to targeted data driven decision making. We believe this dataset will contribute towards digitizing agriculture by generating the foundational baselayer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05359v1-abstract-full').style.display = 'none'; document.getElementById('2411.05359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 7 tables, 15 figs</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04512">arXiv:2411.04512</a> <span> [<a href="https://arxiv.org/pdf/2411.04512">pdf</a>, <a href="https://arxiv.org/format/2411.04512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Normalized Space Alignment: A Versatile Metric for Representation Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ebadulla%2C+D">Danish Ebadulla</a>, <a href="/search/cs?searchtype=author&query=Gulati%2C+A">Aditya Gulati</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ambuj Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04512v1-abstract-short" style="display: inline;"> We introduce a manifold analysis technique for neural network representations. Normalized Space Alignment (NSA) compares pairwise distances between two point clouds derived from the same source and having the same size, while potentially possessing differing dimensionalities. NSA can act as both an analytical tool and a differentiable loss function, providing a robust means of comparing and aligni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04512v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04512v1-abstract-full" style="display: none;"> We introduce a manifold analysis technique for neural network representations. Normalized Space Alignment (NSA) compares pairwise distances between two point clouds derived from the same source and having the same size, while potentially possessing differing dimensionalities. NSA can act as both an analytical tool and a differentiable loss function, providing a robust means of comparing and aligning representations across different layers and models. It satisfies the criteria necessary for both a similarity metric and a neural network loss function. We showcase NSA's versatility by illustrating its utility as a representation space analysis metric, a structure-preserving loss function, and a robustness analysis tool. NSA is not only computationally efficient but it can also approximate the global structural discrepancy during mini-batching, facilitating its use in a wide variety of neural network training paradigms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04512v1-abstract-full').style.display = 'none'; document.getElementById('2411.04512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03923">arXiv:2411.03923</a> <span> [<a href="https://arxiv.org/pdf/2411.03923">pdf</a>, <a href="https://arxiv.org/format/2411.03923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluation data contamination in LLMs: how do we measure it and (when) does it matter? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Kocyigit%2C+M+Y">Muhammed Yusuf Kocyigit</a>, <a href="/search/cs?searchtype=author&query=Poulton%2C+A">Andrew Poulton</a>, <a href="/search/cs?searchtype=author&query=Esiobu%2C+D">David Esiobu</a>, <a href="/search/cs?searchtype=author&query=Lomeli%2C+M">Maria Lomeli</a>, <a href="/search/cs?searchtype=author&query=Szilvasy%2C+G">Gergely Szilvasy</a>, <a href="/search/cs?searchtype=author&query=Hupkes%2C+D">Dieuwke Hupkes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03923v1-abstract-short" style="display: inline;"> Hampering the interpretation of benchmark scores, evaluation data contamination has become a growing concern in the evaluation of LLMs, and an active area of research studies its effects. While evaluation data contamination is easily understood intuitively, it is surprisingly difficult to define precisely which samples should be considered contaminated and, consequently, how it impacts benchmark s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03923v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03923v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03923v1-abstract-full" style="display: none;"> Hampering the interpretation of benchmark scores, evaluation data contamination has become a growing concern in the evaluation of LLMs, and an active area of research studies its effects. While evaluation data contamination is easily understood intuitively, it is surprisingly difficult to define precisely which samples should be considered contaminated and, consequently, how it impacts benchmark scores. We propose that these questions should be addressed together and that contamination metrics can be assessed based on whether models benefit from the examples they mark contaminated. We propose a novel analysis method called ConTAM, and show with a large scale survey of existing and novel n-gram based contamination metrics across 13 benchmarks and 7 models from 2 different families that ConTAM can be used to better understand evaluation data contamination and its effects. We find that contamination may have a much larger effect than reported in recent LLM releases and benefits models differently at different scales. We also find that considering only the longest contaminated substring provides a better signal than considering a union of all contaminated substrings, and that doing model and benchmark specific threshold analysis greatly increases the specificity of the results. Lastly, we investigate the impact of hyperparameter choices, finding that, among other things, both using larger values of n and disregarding matches that are infrequent in the pre-training data lead to many false negatives. With ConTAM, we provide a method to empirically ground evaluation data contamination metrics in downstream effects. With our exploration, we shed light on how evaluation data contamination can impact LLMs and provide insight into the considerations important when doing contamination analysis. We end our paper by discussing these in more detail and providing concrete suggestions for future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03923v1-abstract-full').style.display = 'none'; document.getElementById('2411.03923v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01405">arXiv:2411.01405</a> <span> [<a href="https://arxiv.org/pdf/2411.01405">pdf</a>, <a href="https://arxiv.org/format/2411.01405">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Computing Experiment-Constrained D-Optimal Designs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pillai%2C+A">Aditya Pillai</a>, <a href="/search/cs?searchtype=author&query=Ponte%2C+G">Gabriel Ponte</a>, <a href="/search/cs?searchtype=author&query=Fampa%2C+M">Marcia Fampa</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jon Lee</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+a+M">and Mohit Singh</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+W">Weijun Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01405v1-abstract-short" style="display: inline;"> In optimal experimental design, the objective is to select a limited set of experiments that maximizes information about unknown model parameters based on factor levels. This work addresses the generalized D-optimal design problem, allowing for nonlinear relationships in factor levels. We develop scalable algorithms suitable for cases where the number of candidate experiments grows exponentially w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01405v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01405v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01405v1-abstract-full" style="display: none;"> In optimal experimental design, the objective is to select a limited set of experiments that maximizes information about unknown model parameters based on factor levels. This work addresses the generalized D-optimal design problem, allowing for nonlinear relationships in factor levels. We develop scalable algorithms suitable for cases where the number of candidate experiments grows exponentially with the factor dimension, focusing on both first- and second-order models under design constraints. Particularly, our approach integrates convex relaxation with pricing-based local search techniques, which can provide upper bounds and performance guarantees. Unlike traditional local search methods, such as the ``Fedorov exchange" and its variants, our method effectively accommodates arbitrary side constraints in the design space. Furthermore, it yields both a feasible solution and an upper bound on the optimal value derived from the convex relaxation. Numerical results highlight the efficiency and scalability of our algorithms, demonstrating superior performance compared to the state-of-the-art commercial software, JMP <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01405v1-abstract-full').style.display = 'none'; document.getElementById('2411.01405v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22029">arXiv:2410.22029</a> <span> [<a href="https://arxiv.org/pdf/2410.22029">pdf</a>, <a href="https://arxiv.org/format/2410.22029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Are VLMs Really Blind </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ayush Singh</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mansi Gupta</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22029v1-abstract-short" style="display: inline;"> Vision Language Models excel in handling a wide range of complex tasks, including Optical Character Recognition (OCR), Visual Question Answering (VQA), and advanced geometric reasoning. However, these models fail to perform well on low-level basic visual tasks which are especially easy for humans. Our goal in this work was to determine if these models are truly "blind" to geometric reasoning or if… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22029v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22029v1-abstract-full" style="display: none;"> Vision Language Models excel in handling a wide range of complex tasks, including Optical Character Recognition (OCR), Visual Question Answering (VQA), and advanced geometric reasoning. However, these models fail to perform well on low-level basic visual tasks which are especially easy for humans. Our goal in this work was to determine if these models are truly "blind" to geometric reasoning or if there are ways to enhance their capabilities in this area. Our work presents a novel automatic pipeline designed to extract key information from images in response to specific questions. Instead of just relying on direct VQA, we use question-derived keywords to create a caption that highlights important details in the image related to the question. This caption is then used by a language model to provide a precise answer to the question without requiring external fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22029v1-abstract-full').style.display = 'none'; document.getElementById('2410.22029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20036">arXiv:2410.20036</a> <span> [<a href="https://arxiv.org/pdf/2410.20036">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Architectural Flaw Detection in Civil Engineering Using GPT-4 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+S">Saket Kumar</a>, <a href="/search/cs?searchtype=author&query=Ehtesham%2C+A">Abul Ehtesham</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Aditi Singh</a>, <a href="/search/cs?searchtype=author&query=Khoei%2C+T+T">Tala Talaei Khoei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20036v1-abstract-short" style="display: inline;"> The application of artificial intelligence (AI) in civil engineering presents a transformative approach to enhancing design quality and safety. This paper investigates the potential of the advanced LLM GPT4 Turbo vision model in detecting architectural flaws during the design phase, with a specific focus on identifying missing doors and windows. The study evaluates the model's performance through… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20036v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20036v1-abstract-full" style="display: none;"> The application of artificial intelligence (AI) in civil engineering presents a transformative approach to enhancing design quality and safety. This paper investigates the potential of the advanced LLM GPT4 Turbo vision model in detecting architectural flaws during the design phase, with a specific focus on identifying missing doors and windows. The study evaluates the model's performance through metrics such as precision, recall, and F1 score, demonstrating AI's effectiveness in accurately detecting flaws compared to human-verified data. Additionally, the research explores AI's broader capabilities, including identifying load-bearing issues, material weaknesses, and ensuring compliance with building codes. The findings highlight how AI can significantly improve design accuracy, reduce costly revisions, and support sustainable practices, ultimately revolutionizing the civil engineering field by ensuring safer, more efficient, and aesthetically optimized structures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20036v1-abstract-full').style.display = 'none'; document.getElementById('2410.20036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20011">arXiv:2410.20011</a> <span> [<a href="https://arxiv.org/pdf/2410.20011">pdf</a>, <a href="https://arxiv.org/format/2410.20011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Small Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Van+Nguyen%2C+C">Chien Van Nguyen</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xuan Shen</a>, <a href="/search/cs?searchtype=author&query=Aponte%2C+R">Ryan Aponte</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+S">Samyadeep Basu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengmian Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Kunapuli%2C+S">Sasidhar Kunapuli</a>, <a href="/search/cs?searchtype=author&query=Barrow%2C+J">Joe Barrow</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Junda Wu</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ashish Singh</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jiuxiang Gu</a>, <a href="/search/cs?searchtype=author&query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+N+K">Nesreen K. Ahmed</a>, <a href="/search/cs?searchtype=author&query=Lipka%2C+N">Nedim Lipka</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sungchul Kim</a>, <a href="/search/cs?searchtype=author&query=Deilamsalehy%2C+H">Hanieh Deilamsalehy</a>, <a href="/search/cs?searchtype=author&query=Park%2C+N">Namyong Park</a>, <a href="/search/cs?searchtype=author&query=Rimer%2C+M">Mike Rimer</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhehao Zhang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20011v1-abstract-short" style="display: inline;"> Small Language Models (SLMs) have become increasingly important due to their efficiency and performance to perform various language tasks with minimal computational resources, making them ideal for various settings including on-device, mobile, edge devices, among many others. In this article, we present a comprehensive survey on SLMs, focusing on their architectures, training techniques, and model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20011v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20011v1-abstract-full" style="display: none;"> Small Language Models (SLMs) have become increasingly important due to their efficiency and performance to perform various language tasks with minimal computational resources, making them ideal for various settings including on-device, mobile, edge devices, among many others. In this article, we present a comprehensive survey on SLMs, focusing on their architectures, training techniques, and model compression techniques. We propose a novel taxonomy for categorizing the methods used to optimize SLMs, including model compression, pruning, and quantization techniques. We summarize the benchmark datasets that are useful for benchmarking SLMs along with the evaluation metrics commonly used. Additionally, we highlight key open challenges that remain to be addressed. Our survey aims to serve as a valuable resource for researchers and practitioners interested in developing and deploying small yet efficient language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20011v1-abstract-full').style.display = 'none'; document.getElementById('2410.20011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19858">arXiv:2410.19858</a> <span> [<a href="https://arxiv.org/pdf/2410.19858">pdf</a>, <a href="https://arxiv.org/format/2410.19858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Deep Learning based RMT Data Inversion using Gaussian Random Field </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghosal%2C+K">Koustav Ghosal</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arun Singh</a>, <a href="/search/cs?searchtype=author&query=Malakar%2C+S">Samir Malakar</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+S">Shalivahan Srivastava</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+D">Deepak Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19858v1-abstract-short" style="display: inline;"> Deep learning (DL) methods have emerged as a powerful tool for the inversion of geophysical data. When applied to field data, these models often struggle without additional fine-tuning of the network. This is because they are built on the assumption that the statistical patterns in the training and test datasets are the same. To address this, we propose a DL-based inversion scheme for Radio Magnet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19858v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19858v1-abstract-full" style="display: none;"> Deep learning (DL) methods have emerged as a powerful tool for the inversion of geophysical data. When applied to field data, these models often struggle without additional fine-tuning of the network. This is because they are built on the assumption that the statistical patterns in the training and test datasets are the same. To address this, we propose a DL-based inversion scheme for Radio Magnetotelluric data where the subsurface resistivity models are generated using Gaussian Random Fields (GRF). The network's generalization ability was tested with an out-of-distribution (OOD) dataset comprising a homogeneous background and various rectangular-shaped anomalous bodies. After end-to-end training with the GRF dataset, the pre-trained network successfully identified anomalies in the OOD dataset. Synthetic experiments confirmed that the GRF dataset enhances generalization compared to a homogeneous background OOD dataset. The network accurately recovered structures in a checkerboard resistivity model, and demonstrated robustness to noise, outperforming traditional gradient-based methods. Finally, the developed scheme is tested using exemplary field data from a waste site near Roorkee, India. The proposed scheme enhances generalization in a data-driven supervised learning framework, suggesting a promising direction for OOD generalization in DL methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19858v1-abstract-full').style.display = 'none'; document.getElementById('2410.19858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19712">arXiv:2410.19712</a> <span> [<a href="https://arxiv.org/pdf/2410.19712">pdf</a>, <a href="https://arxiv.org/format/2410.19712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DA-VIL: Adaptive Dual-Arm Manipulation with Reinforcement Learning and Variable Impedance Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karim%2C+M+F">Md Faizal Karim</a>, <a href="/search/cs?searchtype=author&query=Bollimuntha%2C+S">Shreya Bollimuntha</a>, <a href="/search/cs?searchtype=author&query=Hashmi%2C+M+S">Mohammed Saad Hashmi</a>, <a href="/search/cs?searchtype=author&query=Das%2C+A">Autrio Das</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gaurav Singh</a>, <a href="/search/cs?searchtype=author&query=Sridhar%2C+S">Srinath Sridhar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Govindan%2C+N">Nagamanikandan Govindan</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K Madhava Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19712v1-abstract-short" style="display: inline;"> Dual-arm manipulation is an area of growing interest in the robotics community. Enabling robots to perform tasks that require the coordinated use of two arms, is essential for complex manipulation tasks such as handling large objects, assembling components, and performing human-like interactions. However, achieving effective dual-arm manipulation is challenging due to the need for precise coordina… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19712v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19712v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19712v1-abstract-full" style="display: none;"> Dual-arm manipulation is an area of growing interest in the robotics community. Enabling robots to perform tasks that require the coordinated use of two arms, is essential for complex manipulation tasks such as handling large objects, assembling components, and performing human-like interactions. However, achieving effective dual-arm manipulation is challenging due to the need for precise coordination, dynamic adaptability, and the ability to manage interaction forces between the arms and the objects being manipulated. We propose a novel pipeline that combines the advantages of policy learning based on environment feedback and gradient-based optimization to learn controller gains required for the control outputs. This allows the robotic system to dynamically modulate its impedance in response to task demands, ensuring stability and dexterity in dual-arm operations. We evaluate our pipeline on a trajectory-tracking task involving a variety of large, complex objects with different masses and geometries. The performance is then compared to three other established methods for controlling dual-arm robots, demonstrating superior results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19712v1-abstract-full').style.display = 'none'; document.getElementById('2410.19712v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19151">arXiv:2410.19151</a> <span> [<a href="https://arxiv.org/pdf/2410.19151">pdf</a>, <a href="https://arxiv.org/format/2410.19151">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CapsuleNet: A Deep Learning Model To Classify GI Diseases Using EfficientNet-b7 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+A">Aniket Das</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ayushman Singh</a>, <a href="/search/cs?searchtype=author&query=Nishant"> Nishant</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+S">Sharad Prakash</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19151v1-abstract-short" style="display: inline;"> Gastrointestinal (GI) diseases represent a significant global health concern, with Capsule Endoscopy (CE) offering a non-invasive method for diagnosis by capturing a large number of GI tract images. However, the sheer volume of video frames necessitates automated analysis to reduce the workload on doctors and increase the diagnostic accuracy. In this paper, we present CapsuleNet, a deep learning m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19151v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19151v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19151v1-abstract-full" style="display: none;"> Gastrointestinal (GI) diseases represent a significant global health concern, with Capsule Endoscopy (CE) offering a non-invasive method for diagnosis by capturing a large number of GI tract images. However, the sheer volume of video frames necessitates automated analysis to reduce the workload on doctors and increase the diagnostic accuracy. In this paper, we present CapsuleNet, a deep learning model developed for the Capsule Vision 2024 Challenge, aimed at classifying 10 distinct GI abnormalities. Using a highly imbalanced dataset, we implemented various data augmentation strategies, reducing the data imbalance to a manageable level. Our model leverages a pretrained EfficientNet-b7 backbone, tuned with additional layers for classification and optimized with PReLU activation functions. The model demonstrated superior performance on validation data, achieving a micro accuracy of 84.5% and outperforming the VGG16 baseline across most classes. Despite these advances, challenges remain in classifying certain abnormalities, such as Erythema. Our findings suggest that CNN-based models like CapsuleNet can provide an efficient solution for GI tract disease classification, particularly when inference time is a critical factor. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19151v1-abstract-full').style.display = 'none'; document.getElementById('2410.19151v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Capsule Vision 2024 Challenge</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18751">arXiv:2410.18751</a> <span> [<a href="https://arxiv.org/pdf/2410.18751">pdf</a>, <a href="https://arxiv.org/ps/2410.18751">ps</a>, <a href="https://arxiv.org/format/2410.18751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Trading and Market Microstructure">q-fin.TR</span> </div> </div> <p class="title is-5 mathjax"> Double Auctions: Formalization and Automated Checkers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+M">Mohit Garg</a>, <a href="/search/cs?searchtype=author&query=Raja%2C+N">N. Raja</a>, <a href="/search/cs?searchtype=author&query=Sarswat%2C+S">Suneel Sarswat</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kr Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18751v1-abstract-short" style="display: inline;"> Double auctions are widely used in financial markets, such as those for stocks, derivatives, currencies, and commodities, to match demand and supply. Once all buyers and sellers have placed their trade requests, the exchange determines how these requests are to be matched. The two most common objectives for determining the matching are maximizing trade volume at a uniform price and maximizing trad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18751v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18751v1-abstract-full" style="display: none;"> Double auctions are widely used in financial markets, such as those for stocks, derivatives, currencies, and commodities, to match demand and supply. Once all buyers and sellers have placed their trade requests, the exchange determines how these requests are to be matched. The two most common objectives for determining the matching are maximizing trade volume at a uniform price and maximizing trade volume through dynamic pricing. Prior research has primarily focused on single-quantity trade requests. In this work, we extend the framework to handle multiple-quantity trade requests and present fully formalized matching algorithms for double auctions, along with their correctness proofs. We establish new uniqueness theorems, enabling automatic detection of violations in exchange systems by comparing their output to that of a verified program. All proofs are formalized in the Coq Proof Assistant, and we extract verified OCaml and Haskell programs that could serve as a resource for exchanges and market regulators. We demonstrate the practical applicability of our work by running the verified program on real market data from an exchange to automatically check for violations in the exchange algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18751v1-abstract-full').style.display = 'none'; document.getElementById('2410.18751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, Preliminary version of this work was published in ITP 2021</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.3.1; K.4.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18494">arXiv:2410.18494</a> <span> [<a href="https://arxiv.org/pdf/2410.18494">pdf</a>, <a href="https://arxiv.org/format/2410.18494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Assured Automatic Programming via Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mirchev%2C+M">Martin Mirchev</a>, <a href="/search/cs?searchtype=author&query=Costea%2C+A">Andreea Costea</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kr Singh</a>, <a href="/search/cs?searchtype=author&query=Roychoudhury%2C+A">Abhik Roychoudhury</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18494v2-abstract-short" style="display: inline;"> With the advent of AI-based coding engines, it is possible to convert natural language requirements to executable code in standard programming languages. However, AI-generated code can be unreliable, and the natural language requirements driving this code may be ambiguous. In other words, the intent may not be accurately captured in the code generated from AI-coding engines like Copilot. The goal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18494v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18494v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18494v2-abstract-full" style="display: none;"> With the advent of AI-based coding engines, it is possible to convert natural language requirements to executable code in standard programming languages. However, AI-generated code can be unreliable, and the natural language requirements driving this code may be ambiguous. In other words, the intent may not be accurately captured in the code generated from AI-coding engines like Copilot. The goal of our work is to discover the programmer intent, while generating code which conforms to the intent and a proof of this conformance. Our approach to intent discovery is powered by a novel repair engine called program-proof co-evolution, where the object of repair is a tuple (code, logical specification, test) generated by an LLM from the same natural language description. The program and the specification capture the initial operational and declarative description of intent, while the test represents a concrete, albeit partial, understanding of the intent. Our objective is to achieve consistency between the program, the specification, and the test by incrementally refining our understanding of the user intent. Reaching consistency through this repair process provides us with a formal, logical description of the intent, which is then translated back into natural language for the developer's inspection. The resultant intent description is now unambiguous, though expressed in natural language. We demonstrate how the unambiguous intent discovered through our approach increases the percentage of verifiable auto-generated programs on a recently proposed dataset in the Dafny programming language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18494v2-abstract-full').style.display = 'none'; document.getElementById('2410.18494v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17351">arXiv:2410.17351</a> <span> [<a href="https://arxiv.org/pdf/2410.17351">pdf</a>, <a href="https://arxiv.org/format/2410.17351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Multi-agent Reinforcement Learning for Cyber Network Defense </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+V">Aditya Vikram Singh</a>, <a href="/search/cs?searchtype=author&query=Rathbun%2C+E">Ethan Rathbun</a>, <a href="/search/cs?searchtype=author&query=Graham%2C+E">Emma Graham</a>, <a href="/search/cs?searchtype=author&query=Oakley%2C+L">Lisa Oakley</a>, <a href="/search/cs?searchtype=author&query=Boboila%2C+S">Simona Boboila</a>, <a href="/search/cs?searchtype=author&query=Oprea%2C+A">Alina Oprea</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+P">Peter Chin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17351v2-abstract-short" style="display: inline;"> Recent advances in multi-agent reinforcement learning (MARL) have created opportunities to solve complex real-world tasks. Cybersecurity is a notable application area, where defending networks against sophisticated adversaries remains a challenging task typically performed by teams of security operators. In this work, we explore novel MARL strategies for building autonomous cyber network defenses… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17351v2-abstract-full').style.display = 'inline'; document.getElementById('2410.17351v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17351v2-abstract-full" style="display: none;"> Recent advances in multi-agent reinforcement learning (MARL) have created opportunities to solve complex real-world tasks. Cybersecurity is a notable application area, where defending networks against sophisticated adversaries remains a challenging task typically performed by teams of security operators. In this work, we explore novel MARL strategies for building autonomous cyber network defenses that address challenges such as large policy spaces, partial observability, and stealthy, deceptive adversarial strategies. To facilitate efficient and generalized learning, we propose a hierarchical Proximal Policy Optimization (PPO) architecture that decomposes the cyber defense task into specific sub-tasks like network investigation and host recovery. Our approach involves training sub-policies for each sub-task using PPO enhanced with domain expertise. These sub-policies are then leveraged by a master defense policy that coordinates their selection to solve complex network defense tasks. Furthermore, the sub-policies can be fine-tuned and transferred with minimal cost to defend against shifts in adversarial behavior or changes in network settings. We conduct extensive experiments using CybORG Cage 4, the state-of-the-art MARL environment for cyber defense. Comparisons with multiple baselines across different adversaries show that our hierarchical learning approach achieves top performance in terms of convergence speed, episodic return, and several interpretable metrics relevant to cybersecurity, including the fraction of clean machines on the network, precision, and false positives on recoveries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17351v2-abstract-full').style.display = 'none'; document.getElementById('2410.17351v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures, AAMAS preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15321">arXiv:2410.15321</a> <span> [<a href="https://arxiv.org/pdf/2410.15321">pdf</a>, <a href="https://arxiv.org/format/2410.15321">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Integrated Design and Control of a Robotic Arm on a Quadcopter for Enhanced Package Delivery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Animesh Singh</a>, <a href="/search/cs?searchtype=author&query=Hillyer%2C+J">Jason Hillyer</a>, <a href="/search/cs?searchtype=author&query=Ariaei%2C+F">Fariba Ariaei</a>, <a href="/search/cs?searchtype=author&query=Jula%2C+H">Hossein Jula</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15321v1-abstract-short" style="display: inline;"> This paper presents a comprehensive design process for the integration of a robotic arm into a quadcopter, emphasizing the physical modeling, system integration, and controller development. Utilizing SolidWorks for mechanical design and MATLAB Simscape for simulation and control, this study addresses the challenges encountered in integrating the robotic arm with the drone, encompassing both mechan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15321v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15321v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15321v1-abstract-full" style="display: none;"> This paper presents a comprehensive design process for the integration of a robotic arm into a quadcopter, emphasizing the physical modeling, system integration, and controller development. Utilizing SolidWorks for mechanical design and MATLAB Simscape for simulation and control, this study addresses the challenges encountered in integrating the robotic arm with the drone, encompassing both mechanical and control aspects. Two types of controllers are developed and analyzed: a Proportional-Integral-Derivative (PID) controller and a Model Reference Adaptive Controller (MRAC). The design and tuning of these controllers are key components of this research, with the focus on their application in package delivery tasks. Extensive simulations demonstrate the performance of each controller, with PID controllers exhibiting superior trajectory tracking and lower Root Mean Square (RMS) errors under various payload conditions. The results underscore the efficacy of PID control for stable flight and precise maneuvering, while highlighting adaptability of MRAC to changing dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15321v1-abstract-full').style.display = 'none'; document.getElementById('2410.15321v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15262">arXiv:2410.15262</a> <span> [<a href="https://arxiv.org/pdf/2410.15262">pdf</a>, <a href="https://arxiv.org/format/2410.15262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HyQE: Ranking Contexts with Hypothetical Query Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Weichao Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaxin Zhang</a>, <a href="/search/cs?searchtype=author&query=Hasson%2C+H">Hilaf Hasson</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Anu Singh</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenchao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15262v1-abstract-short" style="display: inline;"> In retrieval-augmented systems, context ranking techniques are commonly employed to reorder the retrieved contexts based on their relevance to a user query. A standard approach is to measure this relevance through the similarity between contexts and queries in the embedding space. However, such similarity often fails to capture the relevance. Alternatively, large language models (LLMs) have been u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15262v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15262v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15262v1-abstract-full" style="display: none;"> In retrieval-augmented systems, context ranking techniques are commonly employed to reorder the retrieved contexts based on their relevance to a user query. A standard approach is to measure this relevance through the similarity between contexts and queries in the embedding space. However, such similarity often fails to capture the relevance. Alternatively, large language models (LLMs) have been used for ranking contexts. However, they can encounter scalability issues when the number of candidate contexts grows and the context window sizes of the LLMs remain constrained. Additionally, these approaches require fine-tuning LLMs with domain-specific data. In this work, we introduce a scalable ranking framework that combines embedding similarity and LLM capabilities without requiring LLM fine-tuning. Our framework uses a pre-trained LLM to hypothesize the user query based on the retrieved contexts and ranks the context based on the similarity between the hypothesized queries and the user query. Our framework is efficient at inference time and is compatible with many other retrieval and ranking techniques. Experimental results show that our method improves the ranking performance across multiple benchmarks. The complete code and data are available at https://github.com/zwc662/hyqe <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15262v1-abstract-full').style.display = 'none'; document.getElementById('2410.15262v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12843">arXiv:2410.12843</a> <span> [<a href="https://arxiv.org/pdf/2410.12843">pdf</a>, <a href="https://arxiv.org/format/2410.12843">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Exploring Prompt Engineering: A Systematic Review with SWOT Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Aditi Singh</a>, <a href="/search/cs?searchtype=author&query=Ehtesham%2C+A">Abul Ehtesham</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+G+K">Gaurav Kumar Gupta</a>, <a href="/search/cs?searchtype=author&query=Chatta%2C+N+K">Nikhil Kumar Chatta</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+S">Saket Kumar</a>, <a href="/search/cs?searchtype=author&query=Khoei%2C+T+T">Tala Talaei Khoei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12843v1-abstract-short" style="display: inline;"> In this paper, we conduct a comprehensive SWOT analysis of prompt engineering techniques within the realm of Large Language Models (LLMs). Emphasizing linguistic principles, we examine various techniques to identify their strengths, weaknesses, opportunities, and threats. Our findings provide insights into enhancing AI interactions and improving language model comprehension of human prompts. The a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12843v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12843v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12843v1-abstract-full" style="display: none;"> In this paper, we conduct a comprehensive SWOT analysis of prompt engineering techniques within the realm of Large Language Models (LLMs). Emphasizing linguistic principles, we examine various techniques to identify their strengths, weaknesses, opportunities, and threats. Our findings provide insights into enhancing AI interactions and improving language model comprehension of human prompts. The analysis covers techniques including template-based approaches and fine-tuning, addressing the problems and challenges associated with each. The conclusion offers future research directions aimed at advancing the effectiveness of prompt engineering in optimizing human-machine communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12843v1-abstract-full').style.display = 'none'; document.getElementById('2410.12843v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 1 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11212">arXiv:2410.11212</a> <span> [<a href="https://arxiv.org/pdf/2410.11212">pdf</a>, <a href="https://arxiv.org/format/2410.11212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Data-driven Design of Randomized Control Trials with Guaranteed Treatment Effects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cortes-Gomez%2C+S">Santiago Cortes-Gomez</a>, <a href="/search/cs?searchtype=author&query=Raman%2C+N">Naveen Raman</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Aarti Singh</a>, <a href="/search/cs?searchtype=author&query=Wilder%2C+B">Bryan Wilder</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11212v1-abstract-short" style="display: inline;"> Randomized controlled trials (RCTs) can be used to generate guarantees on treatment effects. However, RCTs often spend unnecessary resources exploring sub-optimal treatments, which can reduce the power of treatment guarantees. To address these concerns, we develop a two-stage RCT where, first on a data-driven screening stage, we prune low-impact treatments, while in the second stage, we develop hi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11212v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11212v1-abstract-full" style="display: none;"> Randomized controlled trials (RCTs) can be used to generate guarantees on treatment effects. However, RCTs often spend unnecessary resources exploring sub-optimal treatments, which can reduce the power of treatment guarantees. To address these concerns, we develop a two-stage RCT where, first on a data-driven screening stage, we prune low-impact treatments, while in the second stage, we develop high probability lower bounds on the treatment effect. Unlike existing adaptive RCT frameworks, our method is simple enough to be implemented in scenarios with limited adaptivity. We derive optimal designs for two-stage RCTs and demonstrate how we can implement such designs through sample splitting. Empirically, we demonstrate that two-stage designs improve upon single-stage approaches, especially in scenarios where domain knowledge is available in the form of a prior. Our work is thus, a simple, yet effective, method to estimate high probablility certificates for high performant treatment effects on a RCT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11212v1-abstract-full').style.display = 'none'; document.getElementById('2410.11212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09339">arXiv:2410.09339</a> <span> [<a href="https://arxiv.org/pdf/2410.09339">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Advanced Gesture Recognition in Autism: Integrating YOLOv7, Video Augmentation and VideoMAE for Video Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Amit Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Shrivastava%2C+T">Trapti Shrivastava</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+V">Vrijendra Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09339v1-abstract-short" style="display: inline;"> Deep learning and advancements in contactless sensors have significantly enhanced our ability to understand complex human activities in healthcare settings. In particular, deep learning models utilizing computer vision have been developed to enable detailed analysis of human gesture recognition, especially repetitive gestures which are commonly observed behaviors in children with autism. This rese… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09339v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09339v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09339v1-abstract-full" style="display: none;"> Deep learning and advancements in contactless sensors have significantly enhanced our ability to understand complex human activities in healthcare settings. In particular, deep learning models utilizing computer vision have been developed to enable detailed analysis of human gesture recognition, especially repetitive gestures which are commonly observed behaviors in children with autism. This research work aims to identify repetitive behaviors indicative of autism by analyzing videos captured in natural settings as children engage in daily activities. The focus is on accurately categorizing real-time repetitive gestures such as spinning, head banging, and arm flapping. To this end, we utilize the publicly accessible Self-Stimulatory Behavior Dataset (SSBD) to classify these stereotypical movements. A key component of the proposed methodology is the use of \textbf{VideoMAE}, a model designed to improve both spatial and temporal analysis of video data through a masking and reconstruction mechanism. This model significantly outperformed traditional methods, achieving an accuracy of 97.7\%, a 14.7\% improvement over the previous state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09339v1-abstract-full').style.display = 'none'; document.getElementById('2410.09339v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07393">arXiv:2410.07393</a> <span> [<a href="https://arxiv.org/pdf/2410.07393">pdf</a>, <a href="https://arxiv.org/format/2410.07393">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> How Much Power Must We Extract From a Receiver Antenna to Effect Communications? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Marzetta%2C+T+L">Thomas L. Marzetta</a>, <a href="/search/cs?searchtype=author&query=McMinn%2C+B">Brian McMinn</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amritpal Singh</a>, <a href="/search/cs?searchtype=author&query=Hansen%2C+T+B">Thorkild B. Hansen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07393v1-abstract-short" style="display: inline;"> Subject to the laws of classical physics - the science that governs the design of today's wireless communication systems - there is no need to extract power from a receiver antenna in order to effect communications. If we dispense with a transmission line and, instead, make the front-end electronics colocated with the antenna, then a high input-impedance preamplifier can measure the open-circuit v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07393v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07393v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07393v1-abstract-full" style="display: none;"> Subject to the laws of classical physics - the science that governs the design of today's wireless communication systems - there is no need to extract power from a receiver antenna in order to effect communications. If we dispense with a transmission line and, instead, make the front-end electronics colocated with the antenna, then a high input-impedance preamplifier can measure the open-circuit voltage directly on the antenna port without drawing either current or power. Neither Friis' concept of noise figure, nor Shannon information theory, nor electronics technology dictates that we must extract power from an antenna. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07393v1-abstract-full').style.display = 'none'; document.getElementById('2410.07393v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05928">arXiv:2410.05928</a> <span> [<a href="https://arxiv.org/pdf/2410.05928">pdf</a>, <a href="https://arxiv.org/format/2410.05928">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Beyond Captioning: Task-Specific Prompting for Improved VLM Performance in Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ayush Singh</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mansi Gupta</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Shivank Garg</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Abhinav Kumar</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+V">Vansh Agrawal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05928v1-abstract-short" style="display: inline;"> Vision-Language Models (VLMs) have transformed tasks requiring visual and reasoning abilities, such as image retrieval and Visual Question Answering (VQA). Despite their success, VLMs face significant challenges with tasks involving geometric reasoning, algebraic problem-solving, and counting. These limitations stem from difficulties effectively integrating multiple modalities and accurately inter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05928v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05928v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05928v1-abstract-full" style="display: none;"> Vision-Language Models (VLMs) have transformed tasks requiring visual and reasoning abilities, such as image retrieval and Visual Question Answering (VQA). Despite their success, VLMs face significant challenges with tasks involving geometric reasoning, algebraic problem-solving, and counting. These limitations stem from difficulties effectively integrating multiple modalities and accurately interpreting geometry-related tasks. Various works claim that introducing a captioning pipeline before VQA tasks enhances performance. We incorporated this pipeline for tasks involving geometry, algebra, and counting. We found that captioning results are not generalizable, specifically with larger VLMs primarily trained on downstream QnA tasks showing random performance on math-related challenges. However, we present a promising alternative: task-based prompting, enriching the prompt with task-specific guidance. This approach shows promise and proves more effective than direct captioning methods for math-heavy problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05928v1-abstract-full').style.display = 'none'; document.getElementById('2410.05928v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05326">arXiv:2410.05326</a> <span> [<a href="https://arxiv.org/pdf/2410.05326">pdf</a>, <a href="https://arxiv.org/format/2410.05326">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Early-Cycle Internal Impedance Enables ML-Based Battery Cycle Life Predictions Across Manufacturers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sours%2C+T">Tyler Sours</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+S">Shivang Agarwal</a>, <a href="/search/cs?searchtype=author&query=Cormier%2C+M">Marc Cormier</a>, <a href="/search/cs?searchtype=author&query=Crivelli-Decker%2C+J">Jordan Crivelli-Decker</a>, <a href="/search/cs?searchtype=author&query=Ridderbusch%2C+S">Steffen Ridderbusch</a>, <a href="/search/cs?searchtype=author&query=Glazier%2C+S+L">Stephen L. Glazier</a>, <a href="/search/cs?searchtype=author&query=Aiken%2C+C+P">Connor P. Aiken</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+R">Aayush R. Singh</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+A">Ang Xiao</a>, <a href="/search/cs?searchtype=author&query=Allam%2C+O">Omar Allam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05326v1-abstract-short" style="display: inline;"> Predicting the end-of-life (EOL) of lithium-ion batteries across different manufacturers presents significant challenges due to variations in electrode materials, manufacturing processes, cell formats, and a lack of generally available data. Methods that construct features solely on voltage-capacity profile data typically fail to generalize across cell chemistries. This study introduces a methodol… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05326v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05326v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05326v1-abstract-full" style="display: none;"> Predicting the end-of-life (EOL) of lithium-ion batteries across different manufacturers presents significant challenges due to variations in electrode materials, manufacturing processes, cell formats, and a lack of generally available data. Methods that construct features solely on voltage-capacity profile data typically fail to generalize across cell chemistries. This study introduces a methodology that combines traditional voltage-capacity features with Direct Current Internal Resistance (DCIR) measurements, enabling more accurate and generalizable EOL predictions. The use of early-cycle DCIR data captures critical degradation mechanisms related to internal resistance growth, enhancing model robustness. Models are shown to successfully predict the number of cycles to EOL for unseen manufacturers of varied electrode composition with a mean absolute error (MAE) of 150 cycles. This cross-manufacturer generalizability reduces the need for extensive new data collection and retraining, enabling manufacturers to optimize new battery designs using existing datasets. Additionally, a novel DCIR-compatible dataset is released as part of ongoing efforts to enrich the growing ecosystem of cycling data and accelerate battery materials development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05326v1-abstract-full').style.display = 'none'; document.getElementById('2410.05326v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05274">arXiv:2410.05274</a> <span> [<a href="https://arxiv.org/pdf/2410.05274">pdf</a>, <a href="https://arxiv.org/format/2410.05274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Scale-Invariant Object Detection by Adaptive Convolution with Unified Global-Local Context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amrita Singh</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+S">Snehasis Mukherjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05274v1-abstract-short" style="display: inline;"> Dense features are important for detecting minute objects in images. Unfortunately, despite the remarkable efficacy of the CNN models in multi-scale object detection, CNN models often fail to detect smaller objects in images due to the loss of dense features during the pooling process. Atrous convolution addresses this issue by applying sparse kernels. However, sparse kernels often can lose the mu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05274v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05274v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05274v1-abstract-full" style="display: none;"> Dense features are important for detecting minute objects in images. Unfortunately, despite the remarkable efficacy of the CNN models in multi-scale object detection, CNN models often fail to detect smaller objects in images due to the loss of dense features during the pooling process. Atrous convolution addresses this issue by applying sparse kernels. However, sparse kernels often can lose the multi-scale detection efficacy of the CNN model. In this paper, we propose an object detection model using a Switchable (adaptive) Atrous Convolutional Network (SAC-Net) based on the efficientDet model. A fixed atrous rate limits the performance of the CNN models in the convolutional layers. To overcome this limitation, we introduce a switchable mechanism that allows for dynamically adjusting the atrous rate during the forward pass. The proposed SAC-Net encapsulates the benefits of both low-level and high-level features to achieve improved performance on multi-scale object detection tasks, without losing the dense features. Further, we apply a depth-wise switchable atrous rate to the proposed network, to improve the scale-invariant features. Finally, we apply global context on the proposed model. Our extensive experiments on benchmark datasets demonstrate that the proposed SAC-Net outperforms the state-of-the-art models by a significant margin in terms of accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05274v1-abstract-full').style.display = 'none'; document.getElementById('2410.05274v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03621">arXiv:2410.03621</a> <span> [<a href="https://arxiv.org/pdf/2410.03621">pdf</a>, <a href="https://arxiv.org/format/2410.03621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCE.2024.3373912">10.1109/TCE.2024.3373912 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Global Medical Data Security and Privacy Preserving Standards Identification Framework for Electronic Healthcare Consumers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mishra%2C+V">Vinaytosh Mishra</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+K">Kishu Gupta</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+D">Deepika Saxena</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ashutosh Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03621v1-abstract-short" style="display: inline;"> Electronic Health Records (EHR) are crucial for the success of digital healthcare, with a focus on putting consumers at the center of this transformation. However, the digitalization of healthcare records brings along security and privacy risks for personal data. The major concern is that different countries have varying standards for the security and privacy of medical data. This paper proposed a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03621v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03621v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03621v1-abstract-full" style="display: none;"> Electronic Health Records (EHR) are crucial for the success of digital healthcare, with a focus on putting consumers at the center of this transformation. However, the digitalization of healthcare records brings along security and privacy risks for personal data. The major concern is that different countries have varying standards for the security and privacy of medical data. This paper proposed a novel and comprehensive framework to standardize these rules globally, bringing them together on a common platform. To support this proposal, the study reviews existing literature to understand the research interest in this issue. It also examines six key laws and standards related to security and privacy, identifying twenty concepts. The proposed framework utilized K-means clustering to categorize these concepts and identify five key factors. Finally, an Ordinal Priority Approach is applied to determine the preferred implementation of these factors in the context of EHRs. The proposed study provides a descriptive then prescriptive framework for the implementation of privacy and security in the context of electronic health records. Therefore, the findings of the proposed framework are useful for professionals and policymakers in improving the security and privacy associated with EHRs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03621v1-abstract-full').style.display = 'none'; document.getElementById('2410.03621v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> A Global Medical Data Security and Privacy Preserving Standards Identification Framework for Electronic Healthcare Consumers, in IEEE Transactions on Consumer Electronics, vol. 70, no. 1, pp. 4379-4387, Feb. 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03217">arXiv:2410.03217</a> <span> [<a href="https://arxiv.org/pdf/2410.03217">pdf</a>, <a href="https://arxiv.org/format/2410.03217">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TASE.2024.3456209">10.1109/TASE.2024.3456209 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Intelligent Quantum Cyber-Security Framework for Healthcare Data Management </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+K">Kishu Gupta</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+D">Deepika Saxena</a>, <a href="/search/cs?searchtype=author&query=Rani%2C+P">Pooja Rani</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+J">Jitendra Kumar</a>, <a href="/search/cs?searchtype=author&query=Makkar%2C+A">Aaisha Makkar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ashutosh Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chung-Nan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03217v1-abstract-short" style="display: inline;"> Digital healthcare is essential to facilitate consumers to access and disseminate their medical data easily for enhanced medical care services. However, the significant concern with digitalization across healthcare systems necessitates for a prompt, productive, and secure storage facility along with a vigorous communication strategy, to stimulate sensitive digital healthcare data sharing and proac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03217v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03217v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03217v1-abstract-full" style="display: none;"> Digital healthcare is essential to facilitate consumers to access and disseminate their medical data easily for enhanced medical care services. However, the significant concern with digitalization across healthcare systems necessitates for a prompt, productive, and secure storage facility along with a vigorous communication strategy, to stimulate sensitive digital healthcare data sharing and proactive estimation of malicious entities. In this context, this paper introduces a comprehensive quantum-based framework to overwhelm the potential security and privacy issues for secure healthcare data management. It equips quantum encryption for the secured storage and dispersal of healthcare data over the shared cloud platform by employing quantum encryption. Also, the framework furnishes a quantum feed-forward neural network unit to examine the intention behind the data request before granting access, for proactive estimation of potential data breach. In this way, the proposed framework delivers overall healthcare data management by coupling the advanced and more competent quantum approach with machine learning to safeguard the data storage, access, and prediction of malicious entities in an automated manner. Thus, the proposed IQ-HDM leads to more cooperative and effective healthcare delivery and empowers individuals with adequate custody of their health data. The experimental evaluation and comparison of the proposed IQ-HDM framework with state-of-the-art methods outline a considerable improvement up to 67.6%, in tackling cyber threats related to healthcare data security. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03217v1-abstract-full').style.display = 'none'; document.getElementById('2410.03217v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Automation Science and Engineering (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02725">arXiv:2410.02725</a> <span> [<a href="https://arxiv.org/pdf/2410.02725">pdf</a>, <a href="https://arxiv.org/format/2410.02725">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Inference-Time Compute: LLMs Can Predict if They Can Do Better, Even Mid-Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Manvi%2C+R">Rohin Manvi</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Anikait Singh</a>, <a href="/search/cs?searchtype=author&query=Ermon%2C+S">Stefano Ermon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02725v1-abstract-short" style="display: inline;"> Inference-time computation is a powerful paradigm to enhance the performance of large language models (LLMs), with Best-of-N sampling being a widely used technique. However, this method is computationally expensive, requiring both (1) an external reward model and (2) the generation of multiple samples. In this work, we introduce a new generative self-evaluation scheme designed to adaptively reduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02725v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02725v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02725v1-abstract-full" style="display: none;"> Inference-time computation is a powerful paradigm to enhance the performance of large language models (LLMs), with Best-of-N sampling being a widely used technique. However, this method is computationally expensive, requiring both (1) an external reward model and (2) the generation of multiple samples. In this work, we introduce a new generative self-evaluation scheme designed to adaptively reduce the number of generated samples while maintaining or even improving performance. We use a generative reward model formulation, allowing the LLM to predict mid-generation the probability that restarting the generation will yield a better response. These predictions are obtained without an external reward model and can be used to decide whether or not to generate more samples, prune unpromising samples early on, or to pick the best sample. This capability is very inexpensive as it involves generating a single predefined token. Trained using a dataset constructed with real unfiltered LMSYS user prompts, Llama 3.1 8B's win rate against GPT-4 on AlpacaEval increases from 21% to 34% with 16 samples and math performance on GSM8K improves from 84% to 91%. By sampling only when the LLM determines that it is beneficial to do so and adaptively adjusting temperature annealing, we demonstrate that 74% of the improvement from using 16 samples can be achieved with only 1.2 samples on average. We further demonstrate that 50-75% of samples can be pruned early in generation with minimal degradation in performance. Overall, our methods enable more efficient and scalable compute utilization during inference for LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02725v1-abstract-full').style.display = 'none'; document.getElementById('2410.02725v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19518">arXiv:2409.19518</a> <span> [<a href="https://arxiv.org/pdf/2409.19518">pdf</a>, <a href="https://arxiv.org/format/2409.19518">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KODA: A Data-Driven Recursive Model for Time Series Forecasting and Data Assimilation using Koopman Operators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ashutosh Singh</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ashish Singh</a>, <a href="/search/cs?searchtype=author&query=Imbiriba%2C+T">Tales Imbiriba</a>, <a href="/search/cs?searchtype=author&query=Erdogmus%2C+D">Deniz Erdogmus</a>, <a href="/search/cs?searchtype=author&query=Borsoi%2C+R">Ricardo Borsoi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19518v1-abstract-short" style="display: inline;"> Approaches based on Koopman operators have shown great promise in forecasting time series data generated by complex nonlinear dynamical systems (NLDS). Although such approaches are able to capture the latent state representation of a NLDS, they still face difficulty in long term forecasting when applied to real world data. Specifically many real-world NLDS exhibit time-varying behavior, leading to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19518v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19518v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19518v1-abstract-full" style="display: none;"> Approaches based on Koopman operators have shown great promise in forecasting time series data generated by complex nonlinear dynamical systems (NLDS). Although such approaches are able to capture the latent state representation of a NLDS, they still face difficulty in long term forecasting when applied to real world data. Specifically many real-world NLDS exhibit time-varying behavior, leading to nonstationarity that is hard to capture with such models. Furthermore they lack a systematic data-driven approach to perform data assimilation, that is, exploiting noisy measurements on the fly in the forecasting task. To alleviate the above issues, we propose a Koopman operator-based approach (named KODA - Koopman Operator with Data Assimilation) that integrates forecasting and data assimilation in NLDS. In particular we use a Fourier domain filter to disentangle the data into a physical component whose dynamics can be accurately represented by a Koopman operator, and residual dynamics that represents the local or time varying behavior that are captured by a flexible and learnable recursive model. We carefully design an architecture and training criterion that ensures this decomposition lead to stable and long-term forecasts. Moreover, we introduce a course correction strategy to perform data assimilation with new measurements at inference time. The proposed approach is completely data-driven and can be learned end-to-end. Through extensive experimental comparisons we show that KODA outperforms existing state of the art methods on multiple time series benchmarks such as electricity, temperature, weather, lorenz 63 and duffing oscillator demonstrating its superior performance and efficacy along the three tasks a) forecasting, b) data assimilation and c) state prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19518v1-abstract-full').style.display = 'none'; document.getElementById('2409.19518v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19425">arXiv:2409.19425</a> <span> [<a href="https://arxiv.org/pdf/2409.19425">pdf</a>, <a href="https://arxiv.org/format/2409.19425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> From Unimodal to Multimodal: Scaling up Projectors to Align Modalities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maniparambil%2C+M">Mayug Maniparambil</a>, <a href="/search/cs?searchtype=author&query=Akshulakov%2C+R">Raiymbek Akshulakov</a>, <a href="/search/cs?searchtype=author&query=Djilali%2C+Y+A+D">Yasser Abdelaziz Dahou Djilali</a>, <a href="/search/cs?searchtype=author&query=Narayan%2C+S">Sanath Narayan</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ankit Singh</a>, <a href="/search/cs?searchtype=author&query=O%27Connor%2C+N+E">Noel E. O'Connor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19425v1-abstract-short" style="display: inline;"> Recent contrastive multimodal vision-language models like CLIP have demonstrated robust open-world semantic understanding, becoming the standard image backbones for vision-language applications due to their aligned latent space. However, this practice has left powerful unimodal encoders for both vision and language underutilized in multimodal applications which raises a key question: Is there a pl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19425v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19425v1-abstract-full" style="display: none;"> Recent contrastive multimodal vision-language models like CLIP have demonstrated robust open-world semantic understanding, becoming the standard image backbones for vision-language applications due to their aligned latent space. However, this practice has left powerful unimodal encoders for both vision and language underutilized in multimodal applications which raises a key question: Is there a plausible way to connect unimodal backbones for zero-shot vision-language tasks? To this end, we propose a novel approach that aligns vision and language modalities using only projection layers on pretrained, frozen unimodal encoders. Our method exploits the high semantic similarity between embedding spaces of well-trained vision and language models. It involves selecting semantically similar encoders in the latent space, curating a concept-rich dataset of image-caption pairs, and training simple MLP projectors. We evaluated our approach on 12 zero-shot classification datasets and 2 image-text retrieval datasets. Our best model, utilizing DINOv2 and All-Roberta-Large text encoder, achieves 76$\%$ accuracy on ImageNet with a 20-fold reduction in data and 65 fold reduction in compute requirements. The proposed framework enhances the accessibility of model development while enabling flexible adaptation across diverse scenarios, offering an efficient approach to building multimodal models by utilizing existing unimodal architectures. Code and datasets will be released soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19425v1-abstract-full').style.display = 'none'; document.getElementById('2409.19425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint, 10 pages; First two authors contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19015">arXiv:2409.19015</a> <span> [<a href="https://arxiv.org/pdf/2409.19015">pdf</a>, <a href="https://arxiv.org/format/2409.19015">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Textless NLP -- Zero Resource Challenge with Low Resource Compute </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ramadass%2C+K">Krithiga Ramadass</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+P">Abrit Pal Singh</a>, <a href="/search/cs?searchtype=author&query=J%2C+S">Srihari J</a>, <a href="/search/cs?searchtype=author&query=Kalyani%2C+S">Sheetal Kalyani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19015v1-abstract-short" style="display: inline;"> This work addresses the persistent challenges of substantial training time and GPU resource requirements even when training lightweight encoder-vocoder models for Textless NLP. We reduce training steps significantly while improving performance by a) leveraging learning rate schedulers for efficient and faster convergence b) optimizing hop length and c) tuning the interpolation scale factors for be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19015v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19015v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19015v1-abstract-full" style="display: none;"> This work addresses the persistent challenges of substantial training time and GPU resource requirements even when training lightweight encoder-vocoder models for Textless NLP. We reduce training steps significantly while improving performance by a) leveraging learning rate schedulers for efficient and faster convergence b) optimizing hop length and c) tuning the interpolation scale factors for better audio quality. Additionally, we explore the latent space representation for Indian languages such as Tamil and Bengali for the acoustic unit discovery and voice conversion task. Our approach leverages a quantized encoder architecture, in conjunction with a vocoder which utilizes the proposed mixture of optimized hop length, tuned interpolation scale factors and a cyclic learning rate scheduler. We obtain consistently good results across English, Tamil and Bengali datasets. The proposed method excels in capturing complex linguistic patterns, resulting in clear reconstructed audio during voice conversion with significantly reduced training time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19015v1-abstract-full').style.display = 'none'; document.getElementById('2409.19015v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17460">arXiv:2409.17460</a> <span> [<a href="https://arxiv.org/pdf/2409.17460">pdf</a>, <a href="https://arxiv.org/format/2409.17460">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards More Relevant Product Search Ranking Via Large Language Models: An Empirical Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qi Liu</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Atul Singh</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingbo Liu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+C">Cun Mu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zheng Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17460v1-abstract-short" style="display: inline;"> Training Learning-to-Rank models for e-commerce product search ranking can be challenging due to the lack of a gold standard of ranking relevance. In this paper, we decompose ranking relevance into content-based and engagement-based aspects, and we propose to leverage Large Language Models (LLMs) for both label and feature generation in model training, primarily aiming to improve the model's predi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17460v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17460v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17460v1-abstract-full" style="display: none;"> Training Learning-to-Rank models for e-commerce product search ranking can be challenging due to the lack of a gold standard of ranking relevance. In this paper, we decompose ranking relevance into content-based and engagement-based aspects, and we propose to leverage Large Language Models (LLMs) for both label and feature generation in model training, primarily aiming to improve the model's predictive capability for content-based relevance. Additionally, we introduce different sigmoid transformations on the LLM outputs to polarize relevance scores in labeling, enhancing the model's ability to balance content-based and engagement-based relevances and thus prioritize highly relevant items overall. Comprehensive online tests and offline evaluations are also conducted for the proposed design. Our work sheds light on advanced strategies for integrating LLMs into e-commerce product search ranking model training, offering a pathway to more effective and balanced models with improved ranking relevance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17460v1-abstract-full').style.display = 'none'; document.getElementById('2409.17460v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in CIKM 2024 GenAIECommerce Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17456">arXiv:2409.17456</a> <span> [<a href="https://arxiv.org/pdf/2409.17456">pdf</a>, <a href="https://arxiv.org/format/2409.17456">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Long or Short or Both? An Exploration on Lookback Time Windows of Behavioral Features in Product Search Ranking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qi Liu</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Atul Singh</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingbo Liu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+C">Cun Mu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zheng Yan</a>, <a href="/search/cs?searchtype=author&query=Pedersen%2C+J">Jan Pedersen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17456v1-abstract-short" style="display: inline;"> Customer shopping behavioral features are core to product search ranking models in eCommerce. In this paper, we investigate the effect of lookback time windows when aggregating these features at the (query, product) level over history. By studying the pros and cons of using long and short time windows, we propose a novel approach to integrating these historical behavioral features of different tim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17456v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17456v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17456v1-abstract-full" style="display: none;"> Customer shopping behavioral features are core to product search ranking models in eCommerce. In this paper, we investigate the effect of lookback time windows when aggregating these features at the (query, product) level over history. By studying the pros and cons of using long and short time windows, we propose a novel approach to integrating these historical behavioral features of different time windows. In particular, we address the criticality of using query-level vertical signals in ranking models to effectively aggregate all information from different behavioral features. Anecdotal evidence for the proposed approach is also provided using live product search traffic on Walmart.com. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17456v1-abstract-full').style.display = 'none'; document.getElementById('2409.17456v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in ACM SIGIR Workshop on eCommerce 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17141">arXiv:2409.17141</a> <span> [<a href="https://arxiv.org/pdf/2409.17141">pdf</a>, <a href="https://arxiv.org/format/2409.17141">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FineZip : Pushing the Limits of Large Language Models for Practical Lossless Text Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mittu%2C+F">Fazal Mittu</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+Y">Yihuan Bu</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Akshat Gupta</a>, <a href="/search/cs?searchtype=author&query=Devireddy%2C+A">Ashok Devireddy</a>, <a href="/search/cs?searchtype=author&query=Ozdarendeli%2C+A+E">Alp Eren Ozdarendeli</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Anant Singh</a>, <a href="/search/cs?searchtype=author&query=Anumanchipalli%2C+G">Gopala Anumanchipalli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17141v1-abstract-short" style="display: inline;"> While the language modeling objective has been shown to be deeply connected with compression, it is surprising that modern LLMs are not employed in practical text compression systems. In this paper, we provide an in-depth analysis of neural network and transformer-based compression techniques to answer this question. We compare traditional text compression systems with neural network and LLM-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17141v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17141v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17141v1-abstract-full" style="display: none;"> While the language modeling objective has been shown to be deeply connected with compression, it is surprising that modern LLMs are not employed in practical text compression systems. In this paper, we provide an in-depth analysis of neural network and transformer-based compression techniques to answer this question. We compare traditional text compression systems with neural network and LLM-based text compression methods. Although LLM-based systems significantly outperform conventional compression methods, they are highly impractical. Specifically, LLMZip, a recent text compression system using Llama3-8B requires 9.5 days to compress just 10 MB of text, although with huge improvements in compression ratios. To overcome this, we present FineZip - a novel LLM-based text compression system that combines ideas of online memorization and dynamic context to reduce the compression time immensely. FineZip can compress the above corpus in approximately 4 hours compared to 9.5 days, a 54 times improvement over LLMZip and comparable performance. FineZip outperforms traditional algorithmic compression methods with a large margin, improving compression ratios by approximately 50\%. With this work, we take the first step towards making lossless text compression with LLMs a reality. While FineZip presents a significant step in that direction, LLMs are still not a viable solution for large-scale text compression. We hope our work paves the way for future research and innovation to solve this problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17141v1-abstract-full').style.display = 'none'; document.getElementById('2409.17141v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16126">arXiv:2409.16126</a> <span> [<a href="https://arxiv.org/pdf/2409.16126">pdf</a>, <a href="https://arxiv.org/format/2409.16126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VisioPhysioENet: Multimodal Engagement Detection using Visual and Physiological Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Alakhsimar Singh</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+N">Nischay Verma</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+K">Kanav Goyal</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amritpal Singh</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+P">Puneet Kumar</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaobai Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16126v1-abstract-short" style="display: inline;"> This paper presents VisioPhysioENet, a novel multimodal system that leverages visual cues and physiological signals to detect learner engagement. It employs a two-level approach for visual feature extraction using the Dlib library for facial landmark extraction and the OpenCV library for further estimations. This is complemented by extracting physiological signals using the plane-orthogonal-to-ski… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16126v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16126v1-abstract-full" style="display: none;"> This paper presents VisioPhysioENet, a novel multimodal system that leverages visual cues and physiological signals to detect learner engagement. It employs a two-level approach for visual feature extraction using the Dlib library for facial landmark extraction and the OpenCV library for further estimations. This is complemented by extracting physiological signals using the plane-orthogonal-to-skin method to assess cardiovascular activity. These features are integrated using advanced machine learning classifiers, enhancing the detection of various engagement levels. We rigorously evaluate VisioPhysioENet on the DAiSEE dataset, where it achieves an accuracy of 63.09%, demonstrating a superior ability to discern various levels of engagement compared to existing methodologies. The proposed system's code can be accessed at https://github.com/MIntelligence-Group/VisioPhysioENet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16126v1-abstract-full').style.display = 'none'; document.getElementById('2409.16126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 Pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16011">arXiv:2409.16011</a> <span> [<a href="https://arxiv.org/pdf/2409.16011">pdf</a>, <a href="https://arxiv.org/format/2409.16011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> CrowdSurfer: Sampling Optimization Augmented with Vector-Quantized Variational AutoEncoder for Dense Crowd Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+N">Naman Kumar</a>, <a href="/search/cs?searchtype=author&query=Singha%2C+A">Antareep Singha</a>, <a href="/search/cs?searchtype=author&query=Nanwani%2C+L">Laksh Nanwani</a>, <a href="/search/cs?searchtype=author&query=Potdar%2C+D">Dhruv Potdar</a>, <a href="/search/cs?searchtype=author&query=R%2C+T">Tarun R</a>, <a href="/search/cs?searchtype=author&query=Rastgar%2C+F">Fatemeh Rastgar</a>, <a href="/search/cs?searchtype=author&query=Idoko%2C+S">Simon Idoko</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K. Madhava Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16011v1-abstract-short" style="display: inline;"> Navigation amongst densely packed crowds remains a challenge for mobile robots. The complexity increases further if the environment layout changes, making the prior computed global plan infeasible. In this paper, we show that it is possible to dramatically enhance crowd navigation by just improving the local planner. Our approach combines generative modelling with inference time optimization to ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16011v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16011v1-abstract-full" style="display: none;"> Navigation amongst densely packed crowds remains a challenge for mobile robots. The complexity increases further if the environment layout changes, making the prior computed global plan infeasible. In this paper, we show that it is possible to dramatically enhance crowd navigation by just improving the local planner. Our approach combines generative modelling with inference time optimization to generate sophisticated long-horizon local plans at interactive rates. More specifically, we train a Vector Quantized Variational AutoEncoder to learn a prior over the expert trajectory distribution conditioned on the perception input. At run-time, this is used as an initialization for a sampling-based optimizer for further refinement. Our approach does not require any sophisticated prediction of dynamic obstacles and yet provides state-of-the-art performance. In particular, we compare against the recent DRL-VO approach and show a 40% improvement in success rate and a 6% improvement in travel time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16011v1-abstract-full').style.display = 'none'; document.getElementById('2409.16011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14341">arXiv:2409.14341</a> <span> [<a href="https://arxiv.org/pdf/2409.14341">pdf</a>, <a href="https://arxiv.org/format/2409.14341">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> VERCEL: Verification and Rectification of Configuration Errors with Least Squares </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Abhiram Singh</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Sidharth Sharma</a>, <a href="/search/cs?searchtype=author&query=Gumaste%2C+A">Ashwin Gumaste</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14341v1-abstract-short" style="display: inline;"> We present Vercel, a network verification and automatic fault rectification tool that is based on a computationally tractable, algorithmically expressive, and mathematically aesthetic domain of linear algebra. Vercel works on abstracting out packet headers into standard basis vectors that are used to create a port-specific forwarding matrix $\mathcal{A}$, representing a set of packet headers/prefi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14341v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14341v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14341v1-abstract-full" style="display: none;"> We present Vercel, a network verification and automatic fault rectification tool that is based on a computationally tractable, algorithmically expressive, and mathematically aesthetic domain of linear algebra. Vercel works on abstracting out packet headers into standard basis vectors that are used to create a port-specific forwarding matrix $\mathcal{A}$, representing a set of packet headers/prefixes that a router forwards along a port. By equating this matrix $\mathcal{A}$ and a vector $b$ (that represents the set of all headers under consideration), we are able to apply \textit{least squares} (which produces a column rank agnostic solution) to compute which headers are reachable at the destination. Reachability now simply means evaluating if vector $b$ is in the column space of $\mathcal{A}$, which can efficiently be computed using least squares. Further, the use of vector representation and least squares opens new possibilities for understanding network behavior. For example, we are able to map rules, routing policies, what-if scenarios to the fundamental linear algebraic form, $\mathcal{A}x=b$, as well as determine how to configure forwarding tables appropriately. We show Vercel is faster than the state-of-art such as NetPlumber, Veriflow, APKeep, AP Verifier, when measured over diverse datasets. Vercel is almost as fast as Deltanet, when rules are verified in batches and provides better scalability, expressiveness and memory efficiency. A key highlight of Vercel is that while evaluating for reachability, the tool can incorporate intents, and transform these into auto-configurable table entries, implying a recommendation/correction system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14341v1-abstract-full').style.display = 'none'; document.getElementById('2409.14341v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13939">arXiv:2409.13939</a> <span> [<a href="https://arxiv.org/pdf/2409.13939">pdf</a>, <a href="https://arxiv.org/format/2409.13939">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Simple Unsupervised Knowledge Distillation With Space Similarity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Aditya Singh</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haohan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13939v1-abstract-short" style="display: inline;"> As per recent studies, Self-supervised learning (SSL) does not readily extend to smaller architectures. One direction to mitigate this shortcoming while simultaneously training a smaller network without labels is to adopt unsupervised knowledge distillation (UKD). Existing UKD approaches handcraft preservation worthy inter/intra sample relationships between the teacher and its student. However, th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13939v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13939v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13939v1-abstract-full" style="display: none;"> As per recent studies, Self-supervised learning (SSL) does not readily extend to smaller architectures. One direction to mitigate this shortcoming while simultaneously training a smaller network without labels is to adopt unsupervised knowledge distillation (UKD). Existing UKD approaches handcraft preservation worthy inter/intra sample relationships between the teacher and its student. However, this may overlook/ignore other key relationships present in the mapping of a teacher. In this paper, instead of heuristically constructing preservation worthy relationships between samples, we directly motivate the student to model the teacher's embedding manifold. If the mapped manifold is similar, all inter/intra sample relationships are indirectly conserved. We first demonstrate that prior methods cannot preserve teacher's latent manifold due to their sole reliance on $L_2$ normalised embedding features. Subsequently, we propose a simple objective to capture the lost information due to normalisation. Our proposed loss component, termed \textbf{space similarity}, motivates each dimension of a student's feature space to be similar to the corresponding dimension of its teacher. We perform extensive experiments demonstrating strong performance of our proposed approach on various benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13939v1-abstract-full').style.display = 'none'; document.getElementById('2409.13939v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12917">arXiv:2409.12917</a> <span> [<a href="https://arxiv.org/pdf/2409.12917">pdf</a>, <a href="https://arxiv.org/format/2409.12917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Training Language Models to Self-Correct via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Aviral Kumar</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+V">Vincent Zhuang</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+R">Rishabh Agarwal</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yi Su</a>, <a href="/search/cs?searchtype=author&query=Co-Reyes%2C+J+D">John D Co-Reyes</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Avi Singh</a>, <a href="/search/cs?searchtype=author&query=Baumli%2C+K">Kate Baumli</a>, <a href="/search/cs?searchtype=author&query=Iqbal%2C+S">Shariq Iqbal</a>, <a href="/search/cs?searchtype=author&query=Bishop%2C+C">Colton Bishop</a>, <a href="/search/cs?searchtype=author&query=Roelofs%2C+R">Rebecca Roelofs</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L+M">Lei M Zhang</a>, <a href="/search/cs?searchtype=author&query=McKinney%2C+K">Kay McKinney</a>, <a href="/search/cs?searchtype=author&query=Shrivastava%2C+D">Disha Shrivastava</a>, <a href="/search/cs?searchtype=author&query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&query=Tucker%2C+G">George Tucker</a>, <a href="/search/cs?searchtype=author&query=Precup%2C+D">Doina Precup</a>, <a href="/search/cs?searchtype=author&query=Behbahani%2C+F">Feryal Behbahani</a>, <a href="/search/cs?searchtype=author&query=Faust%2C+A">Aleksandra Faust</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12917v2-abstract-short" style="display: inline;"> Self-correction is a highly desirable capability of large language models (LLMs), yet it has consistently been found to be largely ineffective in modern LLMs. Current methods for training self-correction typically depend on either multiple models, a more advanced model, or additional forms of supervision. To address these shortcomings, we develop a multi-turn online reinforcement learning (RL) app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12917v2-abstract-full').style.display = 'inline'; document.getElementById('2409.12917v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12917v2-abstract-full" style="display: none;"> Self-correction is a highly desirable capability of large language models (LLMs), yet it has consistently been found to be largely ineffective in modern LLMs. Current methods for training self-correction typically depend on either multiple models, a more advanced model, or additional forms of supervision. To address these shortcomings, we develop a multi-turn online reinforcement learning (RL) approach, SCoRe, that significantly improves an LLM's self-correction ability using entirely self-generated data. To build SCoRe, we first show that variants of supervised fine-tuning (SFT) on offline model-generated correction traces are often insufficient for instilling self-correction behavior. In particular, we observe that training via SFT falls prey to either a distribution mismatch between mistakes made by the data-collection policy and the model's own responses, or to behavior collapse, where learning implicitly prefers only a certain mode of correction behavior that is often not effective at self-correction on test problems. SCoRe addresses these challenges by training under the model's own distribution of self-generated correction traces and using appropriate regularization to steer the learning process into learning a self-correction behavior that is effective at test time as opposed to fitting high-reward responses for a given prompt. This regularization process includes an initial phase of multi-turn RL on a base model to generate a policy initialization that is less susceptible to collapse, followed by using a reward bonus to amplify self-correction. With Gemini 1.0 Pro and 1.5 Flash models, we find that SCoRe achieves state-of-the-art self-correction performance, improving the base models' self-correction by 15.6% and 9.1% respectively on MATH and HumanEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12917v2-abstract-full').style.display = 'none'; document.getElementById('2409.12917v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12616">arXiv:2409.12616</a> <span> [<a href="https://arxiv.org/pdf/2409.12616">pdf</a>, <a href="https://arxiv.org/format/2409.12616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Semi-Supervised Safe Visuomotor Policy Synthesis using Barrier Certificates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tayal%2C+M">Manan Tayal</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Aditya Singh</a>, <a href="/search/cs?searchtype=author&query=Jagtap%2C+P">Pushpak Jagtap</a>, <a href="/search/cs?searchtype=author&query=Kolathaya%2C+S">Shishir Kolathaya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12616v1-abstract-short" style="display: inline;"> In modern robotics, addressing the lack of accurate state space information in real-world scenarios has led to a significant focus on utilizing visuomotor observation to provide safety assurances. Although supervised learning methods, such as imitation learning, have demonstrated potential in synthesizing control policies based on visuomotor observations, they require ground truth safety labels fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12616v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12616v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12616v1-abstract-full" style="display: none;"> In modern robotics, addressing the lack of accurate state space information in real-world scenarios has led to a significant focus on utilizing visuomotor observation to provide safety assurances. Although supervised learning methods, such as imitation learning, have demonstrated potential in synthesizing control policies based on visuomotor observations, they require ground truth safety labels for the complete dataset and do not provide formal safety assurances. On the other hand, traditional control-theoretic methods like Control Barrier Functions (CBFs) and Hamilton-Jacobi (HJ) Reachability provide formal safety guarantees but depend on accurate knowledge of system dynamics, which is often unavailable for high-dimensional visuomotor data. To overcome these limitations, we propose a novel approach to synthesize a semi-supervised safe visuomotor policy using barrier certificates that integrate the strengths of model-free supervised learning and model-based control methods. This framework synthesizes a provably safe controller without requiring safety labels for the complete dataset and ensures completeness guarantees for both the barrier certificate and the policy. We validate our approach through distinct case studies: an inverted pendulum system and the obstacle avoidance of an autonomous mobile robot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12616v1-abstract-full').style.display = 'none'; document.getElementById('2409.12616v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">First two authors have contributed equally. 8 Pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11847">arXiv:2409.11847</a> <span> [<a href="https://arxiv.org/pdf/2409.11847">pdf</a>, <a href="https://arxiv.org/ps/2409.11847">ps</a>, <a href="https://arxiv.org/format/2409.11847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An efficient wavelet-based physics-informed neural networks for singularly perturbed problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pandey%2C+H">Himanshu Pandey</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Anshima Singh</a>, <a href="/search/cs?searchtype=author&query=Behera%2C+R">Ratikanta Behera</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11847v1-abstract-short" style="display: inline;"> Physics-informed neural networks (PINNs) are a class of deep learning models that utilize physics as differential equations to address complex problems, including ones that may involve limited data availability. However, tackling solutions of differential equations with oscillations or singular perturbations and shock-like structures becomes challenging for PINNs. Considering these challenges, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11847v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11847v1-abstract-full" style="display: none;"> Physics-informed neural networks (PINNs) are a class of deep learning models that utilize physics as differential equations to address complex problems, including ones that may involve limited data availability. However, tackling solutions of differential equations with oscillations or singular perturbations and shock-like structures becomes challenging for PINNs. Considering these challenges, we designed an efficient wavelet-based PINNs (W-PINNs) model to solve singularly perturbed differential equations. Here, we represent the solution in wavelet space using a family of smooth-compactly supported wavelets. This framework represents the solution of a differential equation with significantly fewer degrees of freedom while still retaining in capturing, identifying, and analyzing the local structure of complex physical phenomena. The architecture allows the training process to search for a solution within wavelet space, making the process faster and more accurate. The proposed model does not rely on automatic differentiations for derivatives involved in differential equations and does not require any prior information regarding the behavior of the solution, such as the location of abrupt features. Thus, through a strategic fusion of wavelets with PINNs, W-PINNs excel at capturing localized nonlinear information, making them well-suited for problems showing abrupt behavior in certain regions, such as singularly perturbed problems. The efficiency and accuracy of the proposed neural network model are demonstrated in various test problems, i.e., highly singularly perturbed nonlinear differential equations, the FitzHugh-Nagumo (FHN), and Predator-prey interaction models. The proposed design model exhibits impressive comparisons with traditional PINNs and the recently developed wavelet-based PINNs, which use wavelets as an activation function for solving nonlinear differential equations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11847v1-abstract-full').style.display = 'none'; document.getElementById('2409.11847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11262">arXiv:2409.11262</a> <span> [<a href="https://arxiv.org/pdf/2409.11262">pdf</a>, <a href="https://arxiv.org/format/2409.11262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> The Sounds of Home: A Speech-Removed Residential Audio Dataset for Sound Event Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bibb%C3%B3%2C+G">Gabriel Bibb贸</a>, <a href="/search/cs?searchtype=author&query=Deacon%2C+T">Thomas Deacon</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arshdeep Singh</a>, <a href="/search/cs?searchtype=author&query=Plumbley%2C+M+D">Mark D. Plumbley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11262v2-abstract-short" style="display: inline;"> This paper presents a residential audio dataset to support sound event detection research for smart home applications aimed at promoting wellbeing for older adults. The dataset is constructed by deploying audio recording systems in the homes of 8 participants aged 55-80 years for a 7-day period. Acoustic characteristics are documented through detailed floor plans and construction material informat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11262v2-abstract-full').style.display = 'inline'; document.getElementById('2409.11262v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11262v2-abstract-full" style="display: none;"> This paper presents a residential audio dataset to support sound event detection research for smart home applications aimed at promoting wellbeing for older adults. The dataset is constructed by deploying audio recording systems in the homes of 8 participants aged 55-80 years for a 7-day period. Acoustic characteristics are documented through detailed floor plans and construction material information to enable replication of the recording environments for AI model deployment. A novel automated speech removal pipeline is developed, using pre-trained audio neural networks to detect and remove segments containing spoken voice, while preserving segments containing other sound events. The resulting dataset consists of privacy-compliant audio recordings that accurately capture the soundscapes and activities of daily living within residential spaces. The paper details the dataset creation methodology, the speech removal pipeline utilizing cascaded model architectures, and an analysis of the vocal label distribution to validate the speech removal process. This dataset enables the development and benchmarking of sound event detection models tailored specifically for in-home applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11262v2-abstract-full').style.display = 'none'; document.getElementById('2409.11262v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10979">arXiv:2409.10979</a> <span> [<a href="https://arxiv.org/pdf/2409.10979">pdf</a>, <a href="https://arxiv.org/ps/2409.10979">ps</a>, <a href="https://arxiv.org/format/2409.10979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> A Symbol-Pair Decoder for CSS Codes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jha%2C+V+P">Vatsal Pramod Jha</a>, <a href="/search/cs?searchtype=author&query=Parampalli%2C+U">Udaya Parampalli</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhay Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10979v1-abstract-short" style="display: inline;"> The relation between stabilizer codes and binary codes provided by Gottesman and Calderbank et al. is a celebrated result, as it allows the lifting of classical codes to quantum codes. An equivalent way to state this result is that the work allows us to lift decoders for classical codes over the Hamming metric to decoders for stabilizer quantum codes. A natural question to consider: Can we do some… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10979v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10979v1-abstract-full" style="display: none;"> The relation between stabilizer codes and binary codes provided by Gottesman and Calderbank et al. is a celebrated result, as it allows the lifting of classical codes to quantum codes. An equivalent way to state this result is that the work allows us to lift decoders for classical codes over the Hamming metric to decoders for stabilizer quantum codes. A natural question to consider: Can we do something similar with decoders for classical codes considered over other metrics? i.e., Can we lift decoders for classical codes over other metrics to obtain decoders for stabilizer quantum codes? In our current work, we answer this question in the affirmative by considering classical codes over the symbol-pair metric. In particular, we present a relation between the symplectic weight and the symbol-pair weight and use it to improve the error correction capability of CSS-codes (a well-studied class of stabilizer codes) obtained from cyclic codes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10979v1-abstract-full').style.display = 'none'; document.getElementById('2409.10979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08384">arXiv:2409.08384</a> <span> [<a href="https://arxiv.org/pdf/2409.08384">pdf</a>, <a href="https://arxiv.org/ps/2409.08384">ps</a>, <a href="https://arxiv.org/format/2409.08384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Noisy Low Rank Column-wise Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+P">Ankit Pratap Singh</a>, <a href="/search/cs?searchtype=author&query=Vaswani%2C+N">Namrata Vaswani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08384v1-abstract-short" style="display: inline;"> This letter studies the AltGDmin algorithm for solving the noisy low rank column-wise sensing (LRCS) problem. Our sample complexity guarantee improves upon the best existing one by a factor $\max(r, \log(1/蔚))/r$ where $r$ is the rank of the unknown matrix and $蔚$ is the final desired accuracy. A second contribution of this work is a detailed comparison of guarantees from all work that studies the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08384v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08384v1-abstract-full" style="display: none;"> This letter studies the AltGDmin algorithm for solving the noisy low rank column-wise sensing (LRCS) problem. Our sample complexity guarantee improves upon the best existing one by a factor $\max(r, \log(1/蔚))/r$ where $r$ is the rank of the unknown matrix and $蔚$ is the final desired accuracy. A second contribution of this work is a detailed comparison of guarantees from all work that studies the exact same mathematical problem as LRCS, but refers to it by different names. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08384v1-abstract-full').style.display = 'none'; document.getElementById('2409.08384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02060">arXiv:2409.02060</a> <span> [<a href="https://arxiv.org/pdf/2409.02060">pdf</a>, <a href="https://arxiv.org/format/2409.02060">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OLMoE: Open Mixture-of-Experts Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a>, <a href="/search/cs?searchtype=author&query=Groeneveld%2C+D">Dirk Groeneveld</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/cs?searchtype=author&query=Morrison%2C+J">Jacob Morrison</a>, <a href="/search/cs?searchtype=author&query=Min%2C+S">Sewon Min</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weijia Shi</a>, <a href="/search/cs?searchtype=author&query=Walsh%2C+P">Pete Walsh</a>, <a href="/search/cs?searchtype=author&query=Tafjord%2C+O">Oyvind Tafjord</a>, <a href="/search/cs?searchtype=author&query=Lambert%2C+N">Nathan Lambert</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuling Gu</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+S">Shane Arora</a>, <a href="/search/cs?searchtype=author&query=Bhagia%2C+A">Akshita Bhagia</a>, <a href="/search/cs?searchtype=author&query=Schwenk%2C+D">Dustin Schwenk</a>, <a href="/search/cs?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/cs?searchtype=author&query=Wettig%2C+A">Alexander Wettig</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+B">Binyuan Hui</a>, <a href="/search/cs?searchtype=author&query=Dettmers%2C+T">Tim Dettmers</a>, <a href="/search/cs?searchtype=author&query=Kiela%2C+D">Douwe Kiela</a>, <a href="/search/cs?searchtype=author&query=Farhadi%2C+A">Ali Farhadi</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+N+A">Noah A. Smith</a>, <a href="/search/cs?searchtype=author&query=Koh%2C+P+W">Pang Wei Koh</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Amanpreet Singh</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02060v1-abstract-short" style="display: inline;"> We introduce OLMoE, a fully open, state-of-the-art language model leveraging sparse Mixture-of-Experts (MoE). OLMoE-1B-7B has 7 billion (B) parameters but uses only 1B per input token. We pretrain it on 5 trillion tokens and further adapt it to create OLMoE-1B-7B-Instruct. Our models outperform all available models with similar active parameters, even surpassing larger ones like Llama2-13B-Chat an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02060v1-abstract-full').style.display = 'inline'; document.getElementById('2409.02060v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02060v1-abstract-full" style="display: none;"> We introduce OLMoE, a fully open, state-of-the-art language model leveraging sparse Mixture-of-Experts (MoE). OLMoE-1B-7B has 7 billion (B) parameters but uses only 1B per input token. We pretrain it on 5 trillion tokens and further adapt it to create OLMoE-1B-7B-Instruct. Our models outperform all available models with similar active parameters, even surpassing larger ones like Llama2-13B-Chat and DeepSeekMoE-16B. We present various experiments on MoE training, analyze routing in our model showing high specialization, and open-source all aspects of our work: model weights, training data, code, and logs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02060v1-abstract-full').style.display = 'none'; document.getElementById('2409.02060v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">61 pages (24 main), 36 figures, 14 tables</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Singh%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository