Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 226 results for author: <span class="mathjax">Singh, A K</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Singh%2C+A+K">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Singh, A K"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Singh%2C+A+K&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Singh, A K"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03923">arXiv:2411.03923</a> <span> [<a href="https://arxiv.org/pdf/2411.03923">pdf</a>, <a href="https://arxiv.org/format/2411.03923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluation data contamination in LLMs: how do we measure it and (when) does it matter? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Kocyigit%2C+M+Y">Muhammed Yusuf Kocyigit</a>, <a href="/search/cs?searchtype=author&query=Poulton%2C+A">Andrew Poulton</a>, <a href="/search/cs?searchtype=author&query=Esiobu%2C+D">David Esiobu</a>, <a href="/search/cs?searchtype=author&query=Lomeli%2C+M">Maria Lomeli</a>, <a href="/search/cs?searchtype=author&query=Szilvasy%2C+G">Gergely Szilvasy</a>, <a href="/search/cs?searchtype=author&query=Hupkes%2C+D">Dieuwke Hupkes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03923v1-abstract-short" style="display: inline;"> Hampering the interpretation of benchmark scores, evaluation data contamination has become a growing concern in the evaluation of LLMs, and an active area of research studies its effects. While evaluation data contamination is easily understood intuitively, it is surprisingly difficult to define precisely which samples should be considered contaminated and, consequently, how it impacts benchmark s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03923v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03923v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03923v1-abstract-full" style="display: none;"> Hampering the interpretation of benchmark scores, evaluation data contamination has become a growing concern in the evaluation of LLMs, and an active area of research studies its effects. While evaluation data contamination is easily understood intuitively, it is surprisingly difficult to define precisely which samples should be considered contaminated and, consequently, how it impacts benchmark scores. We propose that these questions should be addressed together and that contamination metrics can be assessed based on whether models benefit from the examples they mark contaminated. We propose a novel analysis method called ConTAM, and show with a large scale survey of existing and novel n-gram based contamination metrics across 13 benchmarks and 7 models from 2 different families that ConTAM can be used to better understand evaluation data contamination and its effects. We find that contamination may have a much larger effect than reported in recent LLM releases and benefits models differently at different scales. We also find that considering only the longest contaminated substring provides a better signal than considering a union of all contaminated substrings, and that doing model and benchmark specific threshold analysis greatly increases the specificity of the results. Lastly, we investigate the impact of hyperparameter choices, finding that, among other things, both using larger values of n and disregarding matches that are infrequent in the pre-training data lead to many false negatives. With ConTAM, we provide a method to empirically ground evaluation data contamination metrics in downstream effects. With our exploration, we shed light on how evaluation data contamination can impact LLMs and provide insight into the considerations important when doing contamination analysis. We end our paper by discussing these in more detail and providing concrete suggestions for future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03923v1-abstract-full').style.display = 'none'; document.getElementById('2411.03923v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19712">arXiv:2410.19712</a> <span> [<a href="https://arxiv.org/pdf/2410.19712">pdf</a>, <a href="https://arxiv.org/format/2410.19712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DA-VIL: Adaptive Dual-Arm Manipulation with Reinforcement Learning and Variable Impedance Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Karim%2C+M+F">Md Faizal Karim</a>, <a href="/search/cs?searchtype=author&query=Bollimuntha%2C+S">Shreya Bollimuntha</a>, <a href="/search/cs?searchtype=author&query=Hashmi%2C+M+S">Mohammed Saad Hashmi</a>, <a href="/search/cs?searchtype=author&query=Das%2C+A">Autrio Das</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gaurav Singh</a>, <a href="/search/cs?searchtype=author&query=Sridhar%2C+S">Srinath Sridhar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Govindan%2C+N">Nagamanikandan Govindan</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K Madhava Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19712v1-abstract-short" style="display: inline;"> Dual-arm manipulation is an area of growing interest in the robotics community. Enabling robots to perform tasks that require the coordinated use of two arms, is essential for complex manipulation tasks such as handling large objects, assembling components, and performing human-like interactions. However, achieving effective dual-arm manipulation is challenging due to the need for precise coordina… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19712v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19712v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19712v1-abstract-full" style="display: none;"> Dual-arm manipulation is an area of growing interest in the robotics community. Enabling robots to perform tasks that require the coordinated use of two arms, is essential for complex manipulation tasks such as handling large objects, assembling components, and performing human-like interactions. However, achieving effective dual-arm manipulation is challenging due to the need for precise coordination, dynamic adaptability, and the ability to manage interaction forces between the arms and the objects being manipulated. We propose a novel pipeline that combines the advantages of policy learning based on environment feedback and gradient-based optimization to learn controller gains required for the control outputs. This allows the robotic system to dynamically modulate its impedance in response to task demands, ensuring stability and dexterity in dual-arm operations. We evaluate our pipeline on a trajectory-tracking task involving a variety of large, complex objects with different masses and geometries. The performance is then compared to three other established methods for controlling dual-arm robots, demonstrating superior results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19712v1-abstract-full').style.display = 'none'; document.getElementById('2410.19712v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18751">arXiv:2410.18751</a> <span> [<a href="https://arxiv.org/pdf/2410.18751">pdf</a>, <a href="https://arxiv.org/ps/2410.18751">ps</a>, <a href="https://arxiv.org/format/2410.18751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Trading and Market Microstructure">q-fin.TR</span> </div> </div> <p class="title is-5 mathjax"> Double Auctions: Formalization and Automated Checkers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Garg%2C+M">Mohit Garg</a>, <a href="/search/cs?searchtype=author&query=Raja%2C+N">N. Raja</a>, <a href="/search/cs?searchtype=author&query=Sarswat%2C+S">Suneel Sarswat</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kr Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18751v1-abstract-short" style="display: inline;"> Double auctions are widely used in financial markets, such as those for stocks, derivatives, currencies, and commodities, to match demand and supply. Once all buyers and sellers have placed their trade requests, the exchange determines how these requests are to be matched. The two most common objectives for determining the matching are maximizing trade volume at a uniform price and maximizing trad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18751v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18751v1-abstract-full" style="display: none;"> Double auctions are widely used in financial markets, such as those for stocks, derivatives, currencies, and commodities, to match demand and supply. Once all buyers and sellers have placed their trade requests, the exchange determines how these requests are to be matched. The two most common objectives for determining the matching are maximizing trade volume at a uniform price and maximizing trade volume through dynamic pricing. Prior research has primarily focused on single-quantity trade requests. In this work, we extend the framework to handle multiple-quantity trade requests and present fully formalized matching algorithms for double auctions, along with their correctness proofs. We establish new uniqueness theorems, enabling automatic detection of violations in exchange systems by comparing their output to that of a verified program. All proofs are formalized in the Coq Proof Assistant, and we extract verified OCaml and Haskell programs that could serve as a resource for exchanges and market regulators. We demonstrate the practical applicability of our work by running the verified program on real market data from an exchange to automatically check for violations in the exchange algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18751v1-abstract-full').style.display = 'none'; document.getElementById('2410.18751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, Preliminary version of this work was published in ITP 2021</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.3.1; K.4.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18494">arXiv:2410.18494</a> <span> [<a href="https://arxiv.org/pdf/2410.18494">pdf</a>, <a href="https://arxiv.org/format/2410.18494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Assured Automatic Programming via Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mirchev%2C+M">Martin Mirchev</a>, <a href="/search/cs?searchtype=author&query=Costea%2C+A">Andreea Costea</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kr Singh</a>, <a href="/search/cs?searchtype=author&query=Roychoudhury%2C+A">Abhik Roychoudhury</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18494v2-abstract-short" style="display: inline;"> With the advent of AI-based coding engines, it is possible to convert natural language requirements to executable code in standard programming languages. However, AI-generated code can be unreliable, and the natural language requirements driving this code may be ambiguous. In other words, the intent may not be accurately captured in the code generated from AI-coding engines like Copilot. The goal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18494v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18494v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18494v2-abstract-full" style="display: none;"> With the advent of AI-based coding engines, it is possible to convert natural language requirements to executable code in standard programming languages. However, AI-generated code can be unreliable, and the natural language requirements driving this code may be ambiguous. In other words, the intent may not be accurately captured in the code generated from AI-coding engines like Copilot. The goal of our work is to discover the programmer intent, while generating code which conforms to the intent and a proof of this conformance. Our approach to intent discovery is powered by a novel repair engine called program-proof co-evolution, where the object of repair is a tuple (code, logical specification, test) generated by an LLM from the same natural language description. The program and the specification capture the initial operational and declarative description of intent, while the test represents a concrete, albeit partial, understanding of the intent. Our objective is to achieve consistency between the program, the specification, and the test by incrementally refining our understanding of the user intent. Reaching consistency through this repair process provides us with a formal, logical description of the intent, which is then translated back into natural language for the developer's inspection. The resultant intent description is now unambiguous, though expressed in natural language. We demonstrate how the unambiguous intent discovered through our approach increases the percentage of verifiable auto-generated programs on a recently proposed dataset in the Dafny programming language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18494v2-abstract-full').style.display = 'none'; document.getElementById('2410.18494v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09339">arXiv:2410.09339</a> <span> [<a href="https://arxiv.org/pdf/2410.09339">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Advanced Gesture Recognition in Autism: Integrating YOLOv7, Video Augmentation and VideoMAE for Video Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Amit Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Shrivastava%2C+T">Trapti Shrivastava</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+V">Vrijendra Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09339v1-abstract-short" style="display: inline;"> Deep learning and advancements in contactless sensors have significantly enhanced our ability to understand complex human activities in healthcare settings. In particular, deep learning models utilizing computer vision have been developed to enable detailed analysis of human gesture recognition, especially repetitive gestures which are commonly observed behaviors in children with autism. This rese… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09339v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09339v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09339v1-abstract-full" style="display: none;"> Deep learning and advancements in contactless sensors have significantly enhanced our ability to understand complex human activities in healthcare settings. In particular, deep learning models utilizing computer vision have been developed to enable detailed analysis of human gesture recognition, especially repetitive gestures which are commonly observed behaviors in children with autism. This research work aims to identify repetitive behaviors indicative of autism by analyzing videos captured in natural settings as children engage in daily activities. The focus is on accurately categorizing real-time repetitive gestures such as spinning, head banging, and arm flapping. To this end, we utilize the publicly accessible Self-Stimulatory Behavior Dataset (SSBD) to classify these stereotypical movements. A key component of the proposed methodology is the use of \textbf{VideoMAE}, a model designed to improve both spatial and temporal analysis of video data through a masking and reconstruction mechanism. This model significantly outperformed traditional methods, achieving an accuracy of 97.7\%, a 14.7\% improvement over the previous state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09339v1-abstract-full').style.display = 'none'; document.getElementById('2410.09339v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03621">arXiv:2410.03621</a> <span> [<a href="https://arxiv.org/pdf/2410.03621">pdf</a>, <a href="https://arxiv.org/format/2410.03621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCE.2024.3373912">10.1109/TCE.2024.3373912 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Global Medical Data Security and Privacy Preserving Standards Identification Framework for Electronic Healthcare Consumers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mishra%2C+V">Vinaytosh Mishra</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+K">Kishu Gupta</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+D">Deepika Saxena</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ashutosh Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03621v1-abstract-short" style="display: inline;"> Electronic Health Records (EHR) are crucial for the success of digital healthcare, with a focus on putting consumers at the center of this transformation. However, the digitalization of healthcare records brings along security and privacy risks for personal data. The major concern is that different countries have varying standards for the security and privacy of medical data. This paper proposed a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03621v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03621v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03621v1-abstract-full" style="display: none;"> Electronic Health Records (EHR) are crucial for the success of digital healthcare, with a focus on putting consumers at the center of this transformation. However, the digitalization of healthcare records brings along security and privacy risks for personal data. The major concern is that different countries have varying standards for the security and privacy of medical data. This paper proposed a novel and comprehensive framework to standardize these rules globally, bringing them together on a common platform. To support this proposal, the study reviews existing literature to understand the research interest in this issue. It also examines six key laws and standards related to security and privacy, identifying twenty concepts. The proposed framework utilized K-means clustering to categorize these concepts and identify five key factors. Finally, an Ordinal Priority Approach is applied to determine the preferred implementation of these factors in the context of EHRs. The proposed study provides a descriptive then prescriptive framework for the implementation of privacy and security in the context of electronic health records. Therefore, the findings of the proposed framework are useful for professionals and policymakers in improving the security and privacy associated with EHRs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03621v1-abstract-full').style.display = 'none'; document.getElementById('2410.03621v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> A Global Medical Data Security and Privacy Preserving Standards Identification Framework for Electronic Healthcare Consumers, in IEEE Transactions on Consumer Electronics, vol. 70, no. 1, pp. 4379-4387, Feb. 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03217">arXiv:2410.03217</a> <span> [<a href="https://arxiv.org/pdf/2410.03217">pdf</a>, <a href="https://arxiv.org/format/2410.03217">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TASE.2024.3456209">10.1109/TASE.2024.3456209 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An Intelligent Quantum Cyber-Security Framework for Healthcare Data Management </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+K">Kishu Gupta</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+D">Deepika Saxena</a>, <a href="/search/cs?searchtype=author&query=Rani%2C+P">Pooja Rani</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+J">Jitendra Kumar</a>, <a href="/search/cs?searchtype=author&query=Makkar%2C+A">Aaisha Makkar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ashutosh Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chung-Nan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03217v1-abstract-short" style="display: inline;"> Digital healthcare is essential to facilitate consumers to access and disseminate their medical data easily for enhanced medical care services. However, the significant concern with digitalization across healthcare systems necessitates for a prompt, productive, and secure storage facility along with a vigorous communication strategy, to stimulate sensitive digital healthcare data sharing and proac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03217v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03217v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03217v1-abstract-full" style="display: none;"> Digital healthcare is essential to facilitate consumers to access and disseminate their medical data easily for enhanced medical care services. However, the significant concern with digitalization across healthcare systems necessitates for a prompt, productive, and secure storage facility along with a vigorous communication strategy, to stimulate sensitive digital healthcare data sharing and proactive estimation of malicious entities. In this context, this paper introduces a comprehensive quantum-based framework to overwhelm the potential security and privacy issues for secure healthcare data management. It equips quantum encryption for the secured storage and dispersal of healthcare data over the shared cloud platform by employing quantum encryption. Also, the framework furnishes a quantum feed-forward neural network unit to examine the intention behind the data request before granting access, for proactive estimation of potential data breach. In this way, the proposed framework delivers overall healthcare data management by coupling the advanced and more competent quantum approach with machine learning to safeguard the data storage, access, and prediction of malicious entities in an automated manner. Thus, the proposed IQ-HDM leads to more cooperative and effective healthcare delivery and empowers individuals with adequate custody of their health data. The experimental evaluation and comparison of the proposed IQ-HDM framework with state-of-the-art methods outline a considerable improvement up to 67.6%, in tackling cyber threats related to healthcare data security. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03217v1-abstract-full').style.display = 'none'; document.getElementById('2410.03217v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Automation Science and Engineering (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16011">arXiv:2409.16011</a> <span> [<a href="https://arxiv.org/pdf/2409.16011">pdf</a>, <a href="https://arxiv.org/format/2409.16011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> CrowdSurfer: Sampling Optimization Augmented with Vector-Quantized Variational AutoEncoder for Dense Crowd Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+N">Naman Kumar</a>, <a href="/search/cs?searchtype=author&query=Singha%2C+A">Antareep Singha</a>, <a href="/search/cs?searchtype=author&query=Nanwani%2C+L">Laksh Nanwani</a>, <a href="/search/cs?searchtype=author&query=Potdar%2C+D">Dhruv Potdar</a>, <a href="/search/cs?searchtype=author&query=R%2C+T">Tarun R</a>, <a href="/search/cs?searchtype=author&query=Rastgar%2C+F">Fatemeh Rastgar</a>, <a href="/search/cs?searchtype=author&query=Idoko%2C+S">Simon Idoko</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K. Madhava Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16011v1-abstract-short" style="display: inline;"> Navigation amongst densely packed crowds remains a challenge for mobile robots. The complexity increases further if the environment layout changes, making the prior computed global plan infeasible. In this paper, we show that it is possible to dramatically enhance crowd navigation by just improving the local planner. Our approach combines generative modelling with inference time optimization to ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16011v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16011v1-abstract-full" style="display: none;"> Navigation amongst densely packed crowds remains a challenge for mobile robots. The complexity increases further if the environment layout changes, making the prior computed global plan infeasible. In this paper, we show that it is possible to dramatically enhance crowd navigation by just improving the local planner. Our approach combines generative modelling with inference time optimization to generate sophisticated long-horizon local plans at interactive rates. More specifically, we train a Vector Quantized Variational AutoEncoder to learn a prior over the expert trajectory distribution conditioned on the perception input. At run-time, this is used as an initialization for a sampling-based optimizer for further refinement. Our approach does not require any sophisticated prediction of dynamic obstacles and yet provides state-of-the-art performance. In particular, we compare against the recent DRL-VO approach and show a 40% improvement in success rate and a 6% improvement in travel time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16011v1-abstract-full').style.display = 'none'; document.getElementById('2409.16011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10979">arXiv:2409.10979</a> <span> [<a href="https://arxiv.org/pdf/2409.10979">pdf</a>, <a href="https://arxiv.org/ps/2409.10979">ps</a>, <a href="https://arxiv.org/format/2409.10979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> A Symbol-Pair Decoder for CSS Codes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jha%2C+V+P">Vatsal Pramod Jha</a>, <a href="/search/cs?searchtype=author&query=Parampalli%2C+U">Udaya Parampalli</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhay Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10979v1-abstract-short" style="display: inline;"> The relation between stabilizer codes and binary codes provided by Gottesman and Calderbank et al. is a celebrated result, as it allows the lifting of classical codes to quantum codes. An equivalent way to state this result is that the work allows us to lift decoders for classical codes over the Hamming metric to decoders for stabilizer quantum codes. A natural question to consider: Can we do some… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10979v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10979v1-abstract-full" style="display: none;"> The relation between stabilizer codes and binary codes provided by Gottesman and Calderbank et al. is a celebrated result, as it allows the lifting of classical codes to quantum codes. An equivalent way to state this result is that the work allows us to lift decoders for classical codes over the Hamming metric to decoders for stabilizer quantum codes. A natural question to consider: Can we do something similar with decoders for classical codes considered over other metrics? i.e., Can we lift decoders for classical codes over other metrics to obtain decoders for stabilizer quantum codes? In our current work, we answer this question in the affirmative by considering classical codes over the symbol-pair metric. In particular, we present a relation between the symplectic weight and the symbol-pair weight and use it to improve the error correction capability of CSS-codes (a well-studied class of stabilizer codes) obtained from cyclic codes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10979v1-abstract-full').style.display = 'none'; document.getElementById('2409.10979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00735">arXiv:2409.00735</a> <span> [<a href="https://arxiv.org/pdf/2409.00735">pdf</a>, <a href="https://arxiv.org/format/2409.00735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AgGym: An agricultural biotic stress simulation environment for ultra-precision management planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khosravi%2C+M">Mahsa Khosravi</a>, <a href="/search/cs?searchtype=author&query=Carroll%2C+M">Matthew Carroll</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+K+L">Kai Liang Tan</a>, <a href="/search/cs?searchtype=author&query=Van+der+Laan%2C+L">Liza Van der Laan</a>, <a href="/search/cs?searchtype=author&query=Raigne%2C+J">Joscif Raigne</a>, <a href="/search/cs?searchtype=author&query=Mueller%2C+D+S">Daren S. Mueller</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arti Singh</a>, <a href="/search/cs?searchtype=author&query=Balu%2C+A">Aditya Balu</a>, <a href="/search/cs?searchtype=author&query=Ganapathysubramanian%2C+B">Baskar Ganapathysubramanian</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Asheesh Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Soumik Sarkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00735v1-abstract-short" style="display: inline;"> Agricultural production requires careful management of inputs such as fungicides, insecticides, and herbicides to ensure a successful crop that is high-yielding, profitable, and of superior seed quality. Current state-of-the-art field crop management relies on coarse-scale crop management strategies, where entire fields are sprayed with pest and disease-controlling chemicals, leading to increased… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00735v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00735v1-abstract-full" style="display: none;"> Agricultural production requires careful management of inputs such as fungicides, insecticides, and herbicides to ensure a successful crop that is high-yielding, profitable, and of superior seed quality. Current state-of-the-art field crop management relies on coarse-scale crop management strategies, where entire fields are sprayed with pest and disease-controlling chemicals, leading to increased cost and sub-optimal soil and crop management. To overcome these challenges and optimize crop production, we utilize machine learning tools within a virtual field environment to generate localized management plans for farmers to manage biotic threats while maximizing profits. Specifically, we present AgGym, a modular, crop and stress agnostic simulation framework to model the spread of biotic stresses in a field and estimate yield losses with and without chemical treatments. Our validation with real data shows that AgGym can be customized with limited data to simulate yield outcomes under various biotic stress conditions. We further demonstrate that deep reinforcement learning (RL) policies can be trained using AgGym for designing ultra-precise biotic stress mitigation strategies with potential to increase yield recovery with less chemicals and lower cost. Our proposed framework enables personalized decision support that can transform biotic stress management from being schedule based and reactive to opportunistic and prescriptive. We also release the AgGym software implementation as a community resource and invite experts to contribute to this open-sourced publicly available modular environment framework. The source code can be accessed at: https://github.com/SCSLabISU/AgGym. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00735v1-abstract-full').style.display = 'none'; document.getElementById('2409.00735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01444">arXiv:2408.01444</a> <span> [<a href="https://arxiv.org/pdf/2408.01444">pdf</a>, <a href="https://arxiv.org/format/2408.01444">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> No Size Fits All: The Perils and Pitfalls of Leveraging LLMs Vary with Company Size </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Urlana%2C+A">Ashok Urlana</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+C+V">Charaka Vinayak Kumar</a>, <a href="/search/cs?searchtype=author&query=Garlapati%2C+B+M">Bala Mallikarjunarao Garlapati</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ajeet Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+R">Rahul Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01444v1-abstract-short" style="display: inline;"> Large language models (LLMs) are playing a pivotal role in deploying strategic use cases across a range of organizations, from large pan-continental companies to emerging startups. The issues and challenges involved in the successful utilization of LLMs can vary significantly depending on the size of the organization. It is important to study and discuss these pertinent issues of LLM adaptation wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01444v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01444v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01444v1-abstract-full" style="display: none;"> Large language models (LLMs) are playing a pivotal role in deploying strategic use cases across a range of organizations, from large pan-continental companies to emerging startups. The issues and challenges involved in the successful utilization of LLMs can vary significantly depending on the size of the organization. It is important to study and discuss these pertinent issues of LLM adaptation with a focus on the scale of the industrial concerns and brainstorm possible solutions and prospective directions. Such a study has not been prominently featured in the current research literature. In this study, we adopt a threefold strategy: first, we conduct a case study with industry practitioners to formulate the key research questions; second, we examine existing industrial publications to address these questions; and finally, we provide a practical guide for industries to utilize LLMs more efficiently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01444v1-abstract-full').style.display = 'none'; document.getElementById('2408.01444v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19617">arXiv:2407.19617</a> <span> [<a href="https://arxiv.org/pdf/2407.19617">pdf</a>, <a href="https://arxiv.org/format/2407.19617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AgEval: A Benchmark for Zero-Shot and Few-Shot Plant Stress Phenotyping with Multimodal LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arshad%2C+M+A">Muhammad Arbab Arshad</a>, <a href="/search/cs?searchtype=author&query=Jubery%2C+T+Z">Talukder Zaki Jubery</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+T">Tirtho Roy</a>, <a href="/search/cs?searchtype=author&query=Nassiri%2C+R">Rim Nassiri</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Asheesh K. Singh</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arti Singh</a>, <a href="/search/cs?searchtype=author&query=Hegde%2C+C">Chinmay Hegde</a>, <a href="/search/cs?searchtype=author&query=Ganapathysubramanian%2C+B">Baskar Ganapathysubramanian</a>, <a href="/search/cs?searchtype=author&query=Balu%2C+A">Aditya Balu</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+A">Adarsh Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Soumik Sarkar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19617v1-abstract-short" style="display: inline;"> Plant stress phenotyping traditionally relies on expert assessments and specialized models, limiting scalability in agriculture. Recent advances in multimodal large language models (LLMs) offer potential solutions to this challenge. We present AgEval, a benchmark comprising 12 diverse plant stress phenotyping tasks, to evaluate these models' capabilities. Our study assesses zero-shot and few-shot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19617v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19617v1-abstract-full" style="display: none;"> Plant stress phenotyping traditionally relies on expert assessments and specialized models, limiting scalability in agriculture. Recent advances in multimodal large language models (LLMs) offer potential solutions to this challenge. We present AgEval, a benchmark comprising 12 diverse plant stress phenotyping tasks, to evaluate these models' capabilities. Our study assesses zero-shot and few-shot in-context learning performance of state-of-the-art models, including Claude, GPT, Gemini, and LLaVA. Results show significant performance improvements with few-shot learning, with F1 scores increasing from 46.24% to 73.37% in 8-shot identification for the best-performing model. Few-shot examples from other classes in the dataset have negligible or negative impacts, although having the exact category example helps to increase performance by 15.38%. We also quantify the consistency of model performance across different classes within each task, finding that the coefficient of variance (CV) ranges from 26.02% to 58.03% across models, implying that subject matter expertise is needed - of 'difficult' classes - to achieve reliability in performance. AgEval establishes baseline metrics for multimodal LLMs in agricultural applications, offering insights into their promise for enhancing plant stress phenotyping at scale. Benchmark and code can be accessed at: https://anonymous.4open.science/r/AgEval/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19617v1-abstract-full').style.display = 'none'; document.getElementById('2407.19617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13522">arXiv:2407.13522</a> <span> [<a href="https://arxiv.org/pdf/2407.13522">pdf</a>, <a href="https://arxiv.org/format/2407.13522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> INDIC QA BENCHMARK: A Multilingual Benchmark to Evaluate Question Answering capability of LLMs for Indic Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Murthy%2C+R">Rudra Murthy</a>, <a href="/search/cs?searchtype=author&query=kumar%2C+V">Vishwajeet kumar</a>, <a href="/search/cs?searchtype=author&query=Sen%2C+J">Jaydeep Sen</a>, <a href="/search/cs?searchtype=author&query=Ramakrishnan%2C+G">Ganesh Ramakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13522v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable zero-shot and few-shot capabilities in unseen tasks, including context-grounded question answering (QA) in English. However, the evaluation of LLMs' capabilities in non-English languages for context-based QA is limited by the scarcity of benchmarks in non-English languages. To address this gap, we introduce Indic-QA, the largest publicly av… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13522v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13522v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13522v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable zero-shot and few-shot capabilities in unseen tasks, including context-grounded question answering (QA) in English. However, the evaluation of LLMs' capabilities in non-English languages for context-based QA is limited by the scarcity of benchmarks in non-English languages. To address this gap, we introduce Indic-QA, the largest publicly available context-grounded question-answering dataset for 11 major Indian languages from two language families. The dataset comprises both extractive and abstractive question-answering tasks and includes existing datasets as well as English QA datasets translated into Indian languages. Additionally, we generate a synthetic dataset using the Gemini model to create question-answer pairs given a passage, which is then manually verified for quality assurance. We evaluate various multilingual Large Language Models and their instruction-fine-tuned variants on the benchmark and observe that their performance is subpar, particularly for low-resource languages. We hope that the release of this dataset will stimulate further research on the question-answering abilities of LLMs for low-resource languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13522v1-abstract-full').style.display = 'none'; document.getElementById('2407.13522v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00434">arXiv:2407.00434</a> <span> [<a href="https://arxiv.org/pdf/2407.00434">pdf</a>, <a href="https://arxiv.org/format/2407.00434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Brevity is the soul of wit: Pruning long files for code generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yu Yang</a>, <a href="/search/cs?searchtype=author&query=Tirumala%2C+K">Kushal Tirumala</a>, <a href="/search/cs?searchtype=author&query=Elhoushi%2C+M">Mostafa Elhoushi</a>, <a href="/search/cs?searchtype=author&query=Morcos%2C+A+S">Ari S. Morcos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00434v1-abstract-short" style="display: inline;"> Data curation is commonly considered a "secret-sauce" for LLM training, with higher quality data usually leading to better LLM performance. Given the scale of internet-scraped corpora, data pruning has become a larger and larger focus. Specifically, many have shown that de-duplicating data, or sub-selecting higher quality data, can lead to efficiency or performance improvements. Generally, three t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00434v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00434v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00434v1-abstract-full" style="display: none;"> Data curation is commonly considered a "secret-sauce" for LLM training, with higher quality data usually leading to better LLM performance. Given the scale of internet-scraped corpora, data pruning has become a larger and larger focus. Specifically, many have shown that de-duplicating data, or sub-selecting higher quality data, can lead to efficiency or performance improvements. Generally, three types of methods are used to filter internet-scale corpora: embedding-based, heuristic-based, and classifier-based. In this work, we contrast the former two in the domain of finetuning LLMs for code generation. We find that embedding-based methods are often confounded by length, and that a simple heuristic--pruning long files--outperforms other methods in compute-limited regimes. Our method can yield up to a 2x efficiency benefit in training (while matching performance) or a 3.5% absolute performance improvement on HumanEval (while matching compute). However, we find that perplexity on held-out long files can increase, begging the question of whether optimizing data mixtures for common coding benchmarks (HumanEval, MBPP) actually best serves downstream use cases. Overall, we hope our work builds useful intuitions about code data (specifically, the low quality of extremely long code files) provides a compelling heuristic-based method for data pruning, and brings to light questions in how we evaluate code generation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00434v1-abstract-full').style.display = 'none'; document.getElementById('2407.00434v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17720">arXiv:2406.17720</a> <span> [<a href="https://arxiv.org/pdf/2406.17720">pdf</a>, <a href="https://arxiv.org/format/2406.17720">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Arboretum: A Large Multimodal Dataset Enabling AI for Biodiversity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chih-Hsuan Yang</a>, <a href="/search/cs?searchtype=author&query=Feuer%2C+B">Benjamin Feuer</a>, <a href="/search/cs?searchtype=author&query=Jubery%2C+Z">Zaki Jubery</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z+K">Zi K. Deng</a>, <a href="/search/cs?searchtype=author&query=Nakkab%2C+A">Andre Nakkab</a>, <a href="/search/cs?searchtype=author&query=Hasan%2C+M+Z">Md Zahid Hasan</a>, <a href="/search/cs?searchtype=author&query=Chiranjeevi%2C+S">Shivani Chiranjeevi</a>, <a href="/search/cs?searchtype=author&query=Marshall%2C+K">Kelly Marshall</a>, <a href="/search/cs?searchtype=author&query=Baishnab%2C+N">Nirmal Baishnab</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Asheesh K Singh</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arti Singh</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Soumik Sarkar</a>, <a href="/search/cs?searchtype=author&query=Merchant%2C+N">Nirav Merchant</a>, <a href="/search/cs?searchtype=author&query=Hegde%2C+C">Chinmay Hegde</a>, <a href="/search/cs?searchtype=author&query=Ganapathysubramanian%2C+B">Baskar Ganapathysubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17720v1-abstract-short" style="display: inline;"> We introduce Arboretum, the largest publicly accessible dataset designed to advance AI for biodiversity applications. This dataset, curated from the iNaturalist community science platform and vetted by domain experts to ensure accuracy, includes 134.6 million images, surpassing existing datasets in scale by an order of magnitude. The dataset encompasses image-language paired data for a diverse set… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17720v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17720v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17720v1-abstract-full" style="display: none;"> We introduce Arboretum, the largest publicly accessible dataset designed to advance AI for biodiversity applications. This dataset, curated from the iNaturalist community science platform and vetted by domain experts to ensure accuracy, includes 134.6 million images, surpassing existing datasets in scale by an order of magnitude. The dataset encompasses image-language paired data for a diverse set of species from birds (Aves), spiders/ticks/mites (Arachnida), insects (Insecta), plants (Plantae), fungus/mushrooms (Fungi), snails (Mollusca), and snakes/lizards (Reptilia), making it a valuable resource for multimodal vision-language AI models for biodiversity assessment and agriculture research. Each image is annotated with scientific names, taxonomic details, and common names, enhancing the robustness of AI model training. We showcase the value of Arboretum by releasing a suite of CLIP models trained using a subset of 40 million captioned images. We introduce several new benchmarks for rigorous assessment, report accuracy for zero-shot learning, and evaluations across life stages, rare species, confounding species, and various levels of the taxonomic hierarchy. We anticipate that Arboretum will spur the development of AI models that can enable a variety of digital tools ranging from pest control strategies, crop monitoring, and worldwide biodiversity assessment and environmental conservation. These advancements are critical for ensuring food security, preserving ecosystems, and mitigating the impacts of climate change. Arboretum is publicly available, easily accessible, and ready for immediate use. Please see the \href{https://baskargroup.github.io/Arboretum/}{project website} for links to our data, models, and code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17720v1-abstract-full').style.display = 'none'; document.getElementById('2406.17720v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17339">arXiv:2406.17339</a> <span> [<a href="https://arxiv.org/pdf/2406.17339">pdf</a>, <a href="https://arxiv.org/format/2406.17339">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Configuration Selection in Reconfigurable-Antenna MIMO Systems: Physics-Inspired Heuristic Solvers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Krikidis%2C+I">I. Krikidis</a>, <a href="/search/cs?searchtype=author&query=Psomas%2C+C">C. Psomas</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">A. K. Singh</a>, <a href="/search/cs?searchtype=author&query=Jamieson%2C+K">K. Jamieson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17339v1-abstract-short" style="display: inline;"> Reconfigurable antenna multiple-input multiple-output (MIMO) is a foundational technology for the continuing evolution of cellular systems, including upcoming 6G communication systems. In this paper, we address the problem of flexible/reconfigurable antenna configuration selection for point-to-point MIMO antenna systems by using physics-inspired heuristics. Firstly, we optimize the antenna configu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17339v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17339v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17339v1-abstract-full" style="display: none;"> Reconfigurable antenna multiple-input multiple-output (MIMO) is a foundational technology for the continuing evolution of cellular systems, including upcoming 6G communication systems. In this paper, we address the problem of flexible/reconfigurable antenna configuration selection for point-to-point MIMO antenna systems by using physics-inspired heuristics. Firstly, we optimize the antenna configuration to maximize the signal-to-noise ratio (SNR) at the receiver by leveraging two basic heuristic solvers, i.e., coherent Ising machines (CIMs), that mimic quantum mechanical dynamics, and quantum annealing (QA), where a real-world QA architecture is considered (D-Wave). A mathematical framework that converts the configuration selection problem into CIM- and QA- compatible unconstrained quadratic formulations is investigated. Numerical and experimental results show that the proposed designs outperform classical counterparts and achieve near-optimal performance (similar to exhaustive search with exponential complexity) while ensuring polynomial complexity. Moreover, we study the optimal antenna configuration that maximizes the end-to-end Shannon capacity. A simulated annealing (SA) heuristic which achieves near-optimal performance through appropriate parameterization is adopted. A modified version of the basic SA that exploits parallel tempering to avoid local maxima is also studied, which provides additional performance gains. Extended numerical studies show that the SA solutions outperform conventional heuristics (which are also developed for comparison purposes), while the employment of the SNR-based solutions is highly sub-optimal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17339v1-abstract-full').style.display = 'none'; document.getElementById('2406.17339v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2403.12571</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Communications, 2004 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16176">arXiv:2406.16176</a> <span> [<a href="https://arxiv.org/pdf/2406.16176">pdf</a>, <a href="https://arxiv.org/format/2406.16176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GraphEval2000: Benchmarking and Improving Large Language Models on Graph Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qiming Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zichen Chen</a>, <a href="/search/cs?searchtype=author&query=Corcoran%2C+W">Will Corcoran</a>, <a href="/search/cs?searchtype=author&query=Sra%2C+M">Misha Sra</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ambuj K. Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16176v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved remarkable success in natural language processing (NLP), demonstrating significant capabilities in processing and understanding text data. However, recent studies have identified limitations in LLMs' ability to reason about graph-structured data. To address this gap, we introduce GraphEval2000, the first comprehensive graph dataset, comprising 40 graph da… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16176v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16176v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16176v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved remarkable success in natural language processing (NLP), demonstrating significant capabilities in processing and understanding text data. However, recent studies have identified limitations in LLMs' ability to reason about graph-structured data. To address this gap, we introduce GraphEval2000, the first comprehensive graph dataset, comprising 40 graph data structure problems along with 2000 test cases. Additionally, we introduce an evaluation framework based on GraphEval2000, designed to assess the graph reasoning abilities of LLMs through coding challenges. Our dataset categorizes test cases into four primary and four sub-categories, ensuring a comprehensive evaluation. We evaluate eight popular LLMs on GraphEval2000, revealing that LLMs exhibit a better understanding of directed graphs compared to undirected ones. While private LLMs consistently outperform open-source models, the performance gap is narrowing. Furthermore, to improve the usability of our evaluation framework, we propose Structured Symbolic Decomposition (SSD), an instruction-based method designed to enhance LLM performance on GraphEval2000. Results show that SSD improves the performance of GPT-3.5, GPT-4, and GPT-4o on complex graph problems, with an increase of 11.11\%, 33.37\%, and 33.37\%, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16176v1-abstract-full').style.display = 'none'; document.getElementById('2406.16176v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to NeurIPs 2024 Dataset and Benchmark track, under review</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> H.2.8; I.2.6; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14639">arXiv:2406.14639</a> <span> [<a href="https://arxiv.org/pdf/2406.14639">pdf</a>, <a href="https://arxiv.org/format/2406.14639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Differentiable-Optimization Based Neural Policy for Occlusion-Aware Target Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Masnavi%2C+H">Houman Masnavi</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Janabi-Sharifi%2C+F">Farrokh Janabi-Sharifi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14639v1-abstract-short" style="display: inline;"> Tracking a target in cluttered and dynamic environments is challenging but forms a core component in applications like aerial cinematography. The obstacles in the environment not only pose collision risk but can also occlude the target from the field-of-view of the robot. Moreover, the target future trajectory may be unknown and only its current state can be estimated. In this paper, we propose a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14639v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14639v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14639v1-abstract-full" style="display: none;"> Tracking a target in cluttered and dynamic environments is challenging but forms a core component in applications like aerial cinematography. The obstacles in the environment not only pose collision risk but can also occlude the target from the field-of-view of the robot. Moreover, the target future trajectory may be unknown and only its current state can be estimated. In this paper, we propose a learned probabilistic neural policy for safe, occlusion-free target tracking. The core novelty of our work stems from the structure of our policy network that combines generative modeling based on Conditional Variational Autoencoder (CVAE) with differentiable optimization layers. The role of the CVAE is to provide a base trajectory distribution which is then projected onto a learned feasible set through the optimization layer. Furthermore, both the weights of the CVAE network and the parameters of the differentiable optimization can be learned in an end-to-end fashion through demonstration trajectories. We improve the state-of-the-art (SOTA) in the following respects. We show that our learned policy outperforms existing SOTA in terms of occlusion/collision avoidance capabilities and computation time. Second, we present an extensive ablation showing how different components of our learning pipeline contribute to the overall tracking task. We also demonstrate the real-time performance of our approach on resource-constrained hardware such as NVIDIA Jetson TX2. Finally, our learned policy can also be viewed as a reactive planner for navigation in highly cluttered environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14639v1-abstract-full').style.display = 'none'; document.getElementById('2406.14639v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13081">arXiv:2406.13081</a> <span> [<a href="https://arxiv.org/pdf/2406.13081">pdf</a>, <a href="https://arxiv.org/format/2406.13081">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Class-specific Data Augmentation for Plant Stress Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Saleem%2C+N">Nasla Saleem</a>, <a href="/search/cs?searchtype=author&query=Balu%2C+A">Aditya Balu</a>, <a href="/search/cs?searchtype=author&query=Jubery%2C+T+Z">Talukder Zaki Jubery</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arti Singh</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Asheesh K. Singh</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Soumik Sarkar</a>, <a href="/search/cs?searchtype=author&query=Ganapathysubramanian%2C+B">Baskar Ganapathysubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13081v1-abstract-short" style="display: inline;"> Data augmentation is a powerful tool for improving deep learning-based image classifiers for plant stress identification and classification. However, selecting an effective set of augmentations from a large pool of candidates remains a key challenge, particularly in imbalanced and confounding datasets. We propose an approach for automated class-specific data augmentation using a genetic algorithm.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13081v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13081v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13081v1-abstract-full" style="display: none;"> Data augmentation is a powerful tool for improving deep learning-based image classifiers for plant stress identification and classification. However, selecting an effective set of augmentations from a large pool of candidates remains a key challenge, particularly in imbalanced and confounding datasets. We propose an approach for automated class-specific data augmentation using a genetic algorithm. We demonstrate the utility of our approach on soybean [Glycine max (L.) Merr] stress classification where symptoms are observed on leaves; a particularly challenging problem due to confounding classes in the dataset. Our approach yields substantial performance, achieving a mean-per-class accuracy of 97.61% and an overall accuracy of 98% on the soybean leaf stress dataset. Our method significantly improves the accuracy of the most challenging classes, with notable enhancements from 83.01% to 88.89% and from 85.71% to 94.05%, respectively. A key observation we make in this study is that high-performing augmentation strategies can be identified in a computationally efficient manner. We fine-tune only the linear layer of the baseline model with different augmentations, thereby reducing the computational burden associated with training classifiers from scratch for each augmentation policy while achieving exceptional performance. This research represents an advancement in automated data augmentation strategies for plant stress classification, particularly in the context of confounding datasets. Our findings contribute to the growing body of research in tailored augmentation techniques and their potential impact on disease management strategies, crop yields, and global food security. The proposed approach holds the potential to enhance the accuracy and efficiency of deep learning-based tools for managing plant stresses in agriculture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13081v1-abstract-full').style.display = 'none'; document.getElementById('2406.13081v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10229">arXiv:2406.10229</a> <span> [<a href="https://arxiv.org/pdf/2406.10229">pdf</a>, <a href="https://arxiv.org/format/2406.10229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Quantifying Variance in Evaluation Benchmarks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Madaan%2C+L">Lovish Madaan</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Schaeffer%2C+R">Rylan Schaeffer</a>, <a href="/search/cs?searchtype=author&query=Poulton%2C+A">Andrew Poulton</a>, <a href="/search/cs?searchtype=author&query=Koyejo%2C+S">Sanmi Koyejo</a>, <a href="/search/cs?searchtype=author&query=Stenetorp%2C+P">Pontus Stenetorp</a>, <a href="/search/cs?searchtype=author&query=Narang%2C+S">Sharan Narang</a>, <a href="/search/cs?searchtype=author&query=Hupkes%2C+D">Dieuwke Hupkes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10229v1-abstract-short" style="display: inline;"> Evaluation benchmarks are the cornerstone of measuring capabilities of large language models (LLMs), as well as driving progress in said capabilities. Originally designed to make claims about capabilities (or lack thereof) in fully pretrained models, evaluation benchmarks are now also extensively used to decide between various training choices. Despite this widespread usage, we rarely quantify the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10229v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10229v1-abstract-full" style="display: none;"> Evaluation benchmarks are the cornerstone of measuring capabilities of large language models (LLMs), as well as driving progress in said capabilities. Originally designed to make claims about capabilities (or lack thereof) in fully pretrained models, evaluation benchmarks are now also extensively used to decide between various training choices. Despite this widespread usage, we rarely quantify the variance in our evaluation benchmarks, which dictates whether differences in performance are meaningful. Here, we define and measure a range of metrics geared towards measuring variance in evaluation benchmarks, including seed variance across initialisations, and monotonicity during training. By studying a large number of models -- both openly available and pretrained from scratch -- we provide empirical estimates for a variety of variance metrics, with considerations and recommendations for practitioners. We also evaluate the utility and tradeoffs of continuous versus discrete performance measures and explore options for better understanding and reducing this variance. We find that simple changes, such as framing choice tasks (like MMLU) as completion tasks, can often reduce variance for smaller scale ($\sim$7B) models, while more involved methods inspired from human testing literature (such as item analysis and item response theory) struggle to meaningfully reduce variance. Overall, our work provides insights into variance in evaluation benchmarks, suggests LM-specific techniques to reduce variance, and more generally encourages practitioners to carefully factor in variance when comparing models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10229v1-abstract-full').style.display = 'none'; document.getElementById('2406.10229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15766">arXiv:2405.15766</a> <span> [<a href="https://arxiv.org/pdf/2405.15766">pdf</a>, <a href="https://arxiv.org/format/2405.15766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2024.findings-acl.667">10.18653/v1/2024.findings-acl.667 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Enhancing Adverse Drug Event Detection with Multimodal Dataset: Corpus Creation and Model Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sahoo%2C+P">Pranab Sahoo</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ayush Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Saha%2C+S">Sriparna Saha</a>, <a href="/search/cs?searchtype=author&query=Chadha%2C+A">Aman Chadha</a>, <a href="/search/cs?searchtype=author&query=Mondal%2C+S">Samrat Mondal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15766v2-abstract-short" style="display: inline;"> The mining of adverse drug events (ADEs) is pivotal in pharmacovigilance, enhancing patient safety by identifying potential risks associated with medications, facilitating early detection of adverse events, and guiding regulatory decision-making. Traditional ADE detection methods are reliable but slow, not easily adaptable to large-scale operations, and offer limited information. With the exponent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15766v2-abstract-full').style.display = 'inline'; document.getElementById('2405.15766v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15766v2-abstract-full" style="display: none;"> The mining of adverse drug events (ADEs) is pivotal in pharmacovigilance, enhancing patient safety by identifying potential risks associated with medications, facilitating early detection of adverse events, and guiding regulatory decision-making. Traditional ADE detection methods are reliable but slow, not easily adaptable to large-scale operations, and offer limited information. With the exponential increase in data sources like social media content, biomedical literature, and Electronic Medical Records (EMR), extracting relevant ADE-related information from these unstructured texts is imperative. Previous ADE mining studies have focused on text-based methodologies, overlooking visual cues, limiting contextual comprehension, and hindering accurate interpretation. To address this gap, we present a MultiModal Adverse Drug Event (MMADE) detection dataset, merging ADE-related textual information with visual aids. Additionally, we introduce a framework that leverages the capabilities of LLMs and VLMs for ADE detection by generating detailed descriptions of medical images depicting ADEs, aiding healthcare professionals in visually identifying adverse events. Using our MMADE dataset, we showcase the significance of integrating visual cues from images to enhance overall performance. This approach holds promise for patient safety, ADE awareness, and healthcare accessibility, paving the way for further exploration in personalized healthcare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15766v2-abstract-full').style.display = 'none'; document.getElementById('2405.15766v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL Findings 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> 2024.findings-acl.667 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11487">arXiv:2405.11487</a> <span> [<a href="https://arxiv.org/pdf/2405.11487">pdf</a>, <a href="https://arxiv.org/format/2405.11487">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> "Previously on ..." From Recaps to Story Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aditya Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+D">Dhruv Srivastava</a>, <a href="/search/cs?searchtype=author&query=Tapaswi%2C+M">Makarand Tapaswi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11487v1-abstract-short" style="display: inline;"> We introduce multimodal story summarization by leveraging TV episode recaps - short video sequences interweaving key story moments from previous episodes to bring viewers up to speed. We propose PlotSnap, a dataset featuring two crime thriller TV shows with rich recaps and long episodes of 40 minutes. Story summarization labels are unlocked by matching recap shots to corresponding sub-stories in t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11487v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11487v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11487v1-abstract-full" style="display: none;"> We introduce multimodal story summarization by leveraging TV episode recaps - short video sequences interweaving key story moments from previous episodes to bring viewers up to speed. We propose PlotSnap, a dataset featuring two crime thriller TV shows with rich recaps and long episodes of 40 minutes. Story summarization labels are unlocked by matching recap shots to corresponding sub-stories in the episode. We propose a hierarchical model TaleSumm that processes entire episodes by creating compact shot and dialog representations, and predicts importance scores for each video shot and dialog utterance by enabling interactions between local story groups. Unlike traditional summarization, our method extracts multiple plot points from long videos. We present a thorough evaluation on story summarization, including promising cross-series generalization. TaleSumm also shows good results on classic video summarization benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11487v1-abstract-full').style.display = 'none'; document.getElementById('2405.11487v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024; Project page: https://katha-ai.github.io/projects/recap-story-summ/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11200">arXiv:2405.11200</a> <span> [<a href="https://arxiv.org/pdf/2405.11200">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LexGen: Domain-aware Multilingual Lexicon Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maheshwari%2C+A">Ayush Maheshwari</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Atul Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=NJ%2C+K">Karthika NJ</a>, <a href="/search/cs?searchtype=author&query=Bhatt%2C+K">Krishnakant Bhatt</a>, <a href="/search/cs?searchtype=author&query=Jyothi%2C+P">Preethi Jyothi</a>, <a href="/search/cs?searchtype=author&query=Ramakrishnan%2C+G">Ganesh Ramakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11200v2-abstract-short" style="display: inline;"> Lexicon or dictionary generation across domains is of significant societal importance, as it can potentially enhance information accessibility for a diverse user base while preserving language identity. Prior work in the field primarily focuses on bilingual lexical induction, which deals with word alignments using mapping-based or corpora-based approaches. Though initiated by researchers, the rese… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11200v2-abstract-full').style.display = 'inline'; document.getElementById('2405.11200v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11200v2-abstract-full" style="display: none;"> Lexicon or dictionary generation across domains is of significant societal importance, as it can potentially enhance information accessibility for a diverse user base while preserving language identity. Prior work in the field primarily focuses on bilingual lexical induction, which deals with word alignments using mapping-based or corpora-based approaches. Though initiated by researchers, the research associated with lexicon generation is limited, even more so with domain-specific lexicons. This task becomes particularly important in atypical medical, engineering, and other technical domains, owing to the highly infrequent usage of the terms and negligibly low data availability of technical terms in many low-resource languages. Owing to the research gap in lexicon generation, especially with a limited focus on the domain-specific area, we propose a new model to generate dictionary words for 6 Indian languages in the multi-domain setting. Our model consists of domain-specific and domain-generic layers that encode information, and these layers are invoked via a learnable routing technique. Further, we propose an approach to explicitly leverage the relatedness between these Indian languages toward coherent translation. We also release a new benchmark dataset across 6 Indian languages that span 8 diverse domains that can propel further research in domain-specific lexicon induction. We conduct both zero-shot and few-shot experiments across multiple domains to show the efficacy of our proposed model in generalizing to unseen domains and unseen languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11200v2-abstract-full').style.display = 'none'; document.getElementById('2405.11200v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.18591">arXiv:2404.18591</a> <span> [<a href="https://arxiv.org/pdf/2404.18591">pdf</a>, <a href="https://arxiv.org/format/2404.18591">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FashionSD-X: Multimodal Fashion Garment Synthesis using Latent Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Patras%2C+I">Ioannis Patras</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.18591v1-abstract-short" style="display: inline;"> The rapid evolution of the fashion industry increasingly intersects with technological advancements, particularly through the integration of generative AI. This study introduces a novel generative pipeline designed to transform the fashion design process by employing latent diffusion models. Utilizing ControlNet and LoRA fine-tuning, our approach generates high-quality images from multimodal input… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18591v1-abstract-full').style.display = 'inline'; document.getElementById('2404.18591v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.18591v1-abstract-full" style="display: none;"> The rapid evolution of the fashion industry increasingly intersects with technological advancements, particularly through the integration of generative AI. This study introduces a novel generative pipeline designed to transform the fashion design process by employing latent diffusion models. Utilizing ControlNet and LoRA fine-tuning, our approach generates high-quality images from multimodal inputs such as text and sketches. We leverage and enhance state-of-the-art virtual try-on datasets, including Multimodal Dress Code and VITON-HD, by integrating sketch data. Our evaluation, utilizing metrics like FID, CLIP Score, and KID, demonstrates that our model significantly outperforms traditional stable diffusion models. The results not only highlight the effectiveness of our model in generating fashion-appropriate outputs but also underscore the potential of diffusion models in revolutionizing fashion design workflows. This research paves the way for more interactive, personalized, and technologically enriched methodologies in fashion design and representation, bridging the gap between creative vision and practical application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18591v1-abstract-full').style.display = 'none'; document.getElementById('2404.18591v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07129">arXiv:2404.07129</a> <span> [<a href="https://arxiv.org/pdf/2404.07129">pdf</a>, <a href="https://arxiv.org/format/2404.07129">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> What needs to go right for an induction head? A mechanistic study of in-context learning circuits and their formation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Moskovitz%2C+T">Ted Moskovitz</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+F">Felix Hill</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+S+C+Y">Stephanie C. Y. Chan</a>, <a href="/search/cs?searchtype=author&query=Saxe%2C+A+M">Andrew M. Saxe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07129v1-abstract-short" style="display: inline;"> In-context learning is a powerful emergent ability in transformer models. Prior work in mechanistic interpretability has identified a circuit element that may be critical for in-context learning -- the induction head (IH), which performs a match-and-copy operation. During training of large transformers on natural language data, IHs emerge around the same time as a notable phase change in the loss.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07129v1-abstract-full').style.display = 'inline'; document.getElementById('2404.07129v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07129v1-abstract-full" style="display: none;"> In-context learning is a powerful emergent ability in transformer models. Prior work in mechanistic interpretability has identified a circuit element that may be critical for in-context learning -- the induction head (IH), which performs a match-and-copy operation. During training of large transformers on natural language data, IHs emerge around the same time as a notable phase change in the loss. Despite the robust evidence for IHs and this interesting coincidence with the phase change, relatively little is known about the diversity and emergence dynamics of IHs. Why is there more than one IH, and how are they dependent on each other? Why do IHs appear all of a sudden, and what are the subcircuits that enable them to emerge? We answer these questions by studying IH emergence dynamics in a controlled setting by training on synthetic data. In doing so, we develop and share a novel optogenetics-inspired causal framework for modifying activations throughout training. Using this framework, we delineate the diverse and additive nature of IHs. By clamping subsets of activations throughout training, we then identify three underlying subcircuits that interact to drive IH formation, yielding the phase change. Furthermore, these subcircuits shed light on data-dependent properties of formation, such as phase change timing, already showing the promise of this more in-depth understanding of subcircuits that need to "go right" for an induction head. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07129v1-abstract-full').style.display = 'none'; document.getElementById('2404.07129v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 18 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.05631">arXiv:2404.05631</a> <span> [<a href="https://arxiv.org/pdf/2404.05631">pdf</a>, <a href="https://arxiv.org/format/2404.05631">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Multi Digit Ising Mapping for Low Precision Ising Solvers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Jamieson%2C+K">Kyle Jamieson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.05631v1-abstract-short" style="display: inline;"> The last couple of years have seen an ever-increasing interest in using different Ising solvers, like Quantum annealers, Coherent Ising machines, and Oscillator-based Ising machines, for solving tough computational problems in various domains. Although the simulations predict massive performance improvements for several tough computational problems, the real implementations of the Ising solvers te… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05631v1-abstract-full').style.display = 'inline'; document.getElementById('2404.05631v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.05631v1-abstract-full" style="display: none;"> The last couple of years have seen an ever-increasing interest in using different Ising solvers, like Quantum annealers, Coherent Ising machines, and Oscillator-based Ising machines, for solving tough computational problems in various domains. Although the simulations predict massive performance improvements for several tough computational problems, the real implementations of the Ising solvers tend to have limited precision, which can cause significant performance deterioration. This paper presents a novel methodology for mapping the problem on the Ising solvers to artificially increase the effective precision. We further evaluate our method for the Multiple-Input-Multiple-Output signal detection problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.05631v1-abstract-full').style.display = 'none'; document.getElementById('2404.05631v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">version 1.0</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.03307">arXiv:2404.03307</a> <span> [<a href="https://arxiv.org/pdf/2404.03307">pdf</a>, <a href="https://arxiv.org/format/2404.03307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Bi-level Trajectory Optimization on Uneven Terrains with Differentiable Wheel-Terrain Interaction Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Manoharan%2C+A">Amith Manoharan</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Aditya Sharma</a>, <a href="/search/cs?searchtype=author&query=Belsare%2C+H">Himani Belsare</a>, <a href="/search/cs?searchtype=author&query=Pal%2C+K">Kaustab Pal</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K. Madhava Krishna</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.03307v2-abstract-short" style="display: inline;"> Navigation of wheeled vehicles on uneven terrain necessitates going beyond the 2D approaches for trajectory planning. Specifically, it is essential to incorporate the full 6dof variation of vehicle pose and its associated stability cost in the planning process. To this end, most recent works aim to learn a neural network model to predict the vehicle evolution. However, such approaches are data-int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03307v2-abstract-full').style.display = 'inline'; document.getElementById('2404.03307v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.03307v2-abstract-full" style="display: none;"> Navigation of wheeled vehicles on uneven terrain necessitates going beyond the 2D approaches for trajectory planning. Specifically, it is essential to incorporate the full 6dof variation of vehicle pose and its associated stability cost in the planning process. To this end, most recent works aim to learn a neural network model to predict the vehicle evolution. However, such approaches are data-intensive and fraught with generalization issues. In this paper, we present a purely model-based approach that just requires the digital elevation information of the terrain. Specifically, we express the wheel-terrain interaction and 6dof pose prediction as a non-linear least squares (NLS) problem. As a result, trajectory planning can be viewed as a bi-level optimization. The inner optimization layer predicts the pose on the terrain along a given trajectory, while the outer layer deforms the trajectory itself to reduce the stability and kinematic costs of the pose. We improve the state-of-the-art in the following respects. First, we show that our NLS based pose prediction closely matches the output from a high-fidelity physics engine. This result coupled with the fact that we can query gradients of the NLS solver, makes our pose predictor, a differentiable wheel-terrain interaction model. We further leverage this differentiability to efficiently solve the proposed bi-level trajectory optimization problem. Finally, we perform extensive experiments, and comparison with a baseline to showcase the effectiveness of our approach in obtaining smooth, stable trajectories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03307v2-abstract-full').style.display = 'none'; document.getElementById('2404.03307v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures, submitted to IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.20116">arXiv:2403.20116</a> <span> [<a href="https://arxiv.org/pdf/2403.20116">pdf</a>, <a href="https://arxiv.org/format/2403.20116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> LeGo-Drive: Language-enhanced Goal-oriented Closed-Loop End-to-End Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Paul%2C+P">Pranjal Paul</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+A">Anant Garg</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+T">Tushar Choudhary</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K. Madhava Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.20116v1-abstract-short" style="display: inline;"> Existing Vision-Language models (VLMs) estimate either long-term trajectory waypoints or a set of control actions as a reactive solution for closed-loop planning based on their rich scene comprehension. However, these estimations are coarse and are subjective to their "world understanding" which may generate sub-optimal decisions due to perception errors. In this paper, we introduce LeGo-Drive, wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20116v1-abstract-full').style.display = 'inline'; document.getElementById('2403.20116v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.20116v1-abstract-full" style="display: none;"> Existing Vision-Language models (VLMs) estimate either long-term trajectory waypoints or a set of control actions as a reactive solution for closed-loop planning based on their rich scene comprehension. However, these estimations are coarse and are subjective to their "world understanding" which may generate sub-optimal decisions due to perception errors. In this paper, we introduce LeGo-Drive, which aims to address this issue by estimating a goal location based on the given language command as an intermediate representation in an end-to-end setting. The estimated goal might fall in a non-desirable region, like on top of a car for a parking-like command, leading to inadequate planning. Hence, we propose to train the architecture in an end-to-end manner, resulting in iterative refinement of both the goal and the trajectory collectively. We validate the effectiveness of our method through comprehensive experiments conducted in diverse simulated environments. We report significant improvements in standard autonomous driving metrics, with a goal reaching Success Rate of 81%. We further showcase the versatility of LeGo-Drive across different driving scenarios and linguistic inputs, underscoring its potential for practical deployment in autonomous vehicles and intelligent transportation systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.20116v1-abstract-full').style.display = 'none'; document.getElementById('2403.20116v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.19461">arXiv:2403.19461</a> <span> [<a href="https://arxiv.org/pdf/2403.19461">pdf</a>, <a href="https://arxiv.org/format/2403.19461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning Sampling Distribution and Safety Filter for Autonomous Driving with VQ-VAE and Differentiable Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Idoko%2C+S">Simon Idoko</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+B">Basant Sharma</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.19461v2-abstract-short" style="display: inline;"> Sampling trajectories from a distribution followed by ranking them based on a specified cost function is a common approach in autonomous driving. Typically, the sampling distribution is hand-crafted (e.g a Gaussian, or a grid). Recently, there have been efforts towards learning the sampling distribution through generative models such as Conditional Variational Autoencoder (CVAE). However, these ap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19461v2-abstract-full').style.display = 'inline'; document.getElementById('2403.19461v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.19461v2-abstract-full" style="display: none;"> Sampling trajectories from a distribution followed by ranking them based on a specified cost function is a common approach in autonomous driving. Typically, the sampling distribution is hand-crafted (e.g a Gaussian, or a grid). Recently, there have been efforts towards learning the sampling distribution through generative models such as Conditional Variational Autoencoder (CVAE). However, these approaches fail to capture the multi-modality of the driving behaviour due to the Gaussian latent prior of the CVAE. Thus, in this paper, we re-imagine the distribution learning through vector quantized variational autoencoder (VQ-VAE), whose discrete latent-space is well equipped to capture multi-modal sampling distribution. The VQ-VAE is trained with demonstration data of optimal trajectories. We further propose a differentiable optimization based safety filter to minimally correct the VQVAE sampled trajectories to ensure collision avoidance. We use backpropagation through the optimization layers in a self-supervised learning set-up to learn good initialization and optimal parameters of the safety filter. We perform extensive comparisons with state-of-the-art CVAE-based baseline in dense and aggressive traffic scenarios and show a reduction of up to 12 times in collision-rate while being competitive in driving speeds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19461v2-abstract-full').style.display = 'none'; document.getElementById('2403.19461v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16592">arXiv:2403.16592</a> <span> [<a href="https://arxiv.org/pdf/2403.16592">pdf</a>, <a href="https://arxiv.org/format/2403.16592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TrustAI at SemEval-2024 Task 8: A Comprehensive Analysis of Multi-domain Machine Generated Text Detection Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Urlana%2C+A">Ashok Urlana</a>, <a href="/search/cs?searchtype=author&query=Saibewar%2C+A">Aditya Saibewar</a>, <a href="/search/cs?searchtype=author&query=Garlapati%2C+B+M">Bala Mallikarjunarao Garlapati</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+C+V">Charaka Vinayak Kumar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ajeet Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Chalamala%2C+S+R">Srinivasa Rao Chalamala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16592v1-abstract-short" style="display: inline;"> The Large Language Models (LLMs) exhibit remarkable ability to generate fluent content across a wide spectrum of user queries. However, this capability has raised concerns regarding misinformation and personal information leakage. In this paper, we present our methods for the SemEval2024 Task8, aiming to detect machine-generated text across various domains in both mono-lingual and multi-lingual co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16592v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16592v1-abstract-full" style="display: none;"> The Large Language Models (LLMs) exhibit remarkable ability to generate fluent content across a wide spectrum of user queries. However, this capability has raised concerns regarding misinformation and personal information leakage. In this paper, we present our methods for the SemEval2024 Task8, aiming to detect machine-generated text across various domains in both mono-lingual and multi-lingual contexts. Our study comprehensively analyzes various methods to detect machine-generated text, including statistical, neural, and pre-trained model approaches. We also detail our experimental setup and perform a in-depth error analysis to evaluate the effectiveness of these methods. Our methods obtain an accuracy of 86.9\% on the test set of subtask-A mono and 83.7\% for subtask-B. Furthermore, we also highlight the challenges and essential factors for consideration in future studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16592v1-abstract-full').style.display = 'none'; document.getElementById('2403.16592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 1 Figure</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12571">arXiv:2403.12571</a> <span> [<a href="https://arxiv.org/pdf/2403.12571">pdf</a>, <a href="https://arxiv.org/format/2403.12571">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Reconfigurable Antenna MIMO Systems with Coherent Ising Machines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Krikidis%2C+I">Ioannis Krikidis</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Jamieson%2C+K">Kyle Jamieson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12571v1-abstract-short" style="display: inline;"> Reconfigurable antenna multiple-input multiple-output (MIMO) is a promising technology for upcoming 6G communication systems. In this paper, we deal with the problem of configuration selection for reconfigurable antenna MIMO by leveraging Coherent Ising Machines (CIMs). By adopting the CIM as a heuristic solver for the Ising problem, the optimal antenna configuration that maximizes the received si… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12571v1-abstract-full').style.display = 'inline'; document.getElementById('2403.12571v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12571v1-abstract-full" style="display: none;"> Reconfigurable antenna multiple-input multiple-output (MIMO) is a promising technology for upcoming 6G communication systems. In this paper, we deal with the problem of configuration selection for reconfigurable antenna MIMO by leveraging Coherent Ising Machines (CIMs). By adopting the CIM as a heuristic solver for the Ising problem, the optimal antenna configuration that maximizes the received signal-to-noise ratio is investigated. A mathematical framework that converts the selection problem into a CIM-compatible unconstrained quadratic formulation is presented. Numerical studies show that the proposed CIM-based design outperforms classical counterparts and achieves near-optimal performance (similar to exponentially complex exhaustive searching) while ensuring polynomial complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12571v1-abstract-full').style.display = 'none'; document.getElementById('2403.12571v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE International Conference on Communications (ICC), June 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18778">arXiv:2402.18778</a> <span> [<a href="https://arxiv.org/pdf/2402.18778">pdf</a>, <a href="https://arxiv.org/format/2402.18778">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> X-ResQ: Reverse Annealing for Quantum MIMO Detection with Flexible Parallelism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+M">Minsung Kim</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Abhishek Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Venturelli%2C+D">Davide Venturelli</a>, <a href="/search/cs?searchtype=author&query=Kaewell%2C+J">John Kaewell</a>, <a href="/search/cs?searchtype=author&query=Jamieson%2C+K">Kyle Jamieson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18778v2-abstract-short" style="display: inline;"> Quantum Annealing (QA)-accelerated MIMO detection is an emerging research approach in the context of NextG wireless networks. The opportunity is to enable large MIMO systems and thus improve wireless performance. The approach aims to leverage QA to expedite the computation required for theoretically optimal but computationally-demanding Maximum Likelihood detection to overcome the limitations of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18778v2-abstract-full').style.display = 'inline'; document.getElementById('2402.18778v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18778v2-abstract-full" style="display: none;"> Quantum Annealing (QA)-accelerated MIMO detection is an emerging research approach in the context of NextG wireless networks. The opportunity is to enable large MIMO systems and thus improve wireless performance. The approach aims to leverage QA to expedite the computation required for theoretically optimal but computationally-demanding Maximum Likelihood detection to overcome the limitations of the currently deployed linear detectors. This paper presents X-ResQ, a QA-based MIMO detector system featuring fine-grained quantum task parallelism that is uniquely enabled by the Reverse Annealing (RA) protocol. Unlike prior designs, X-ResQ has many desirable system properties for a parallel QA detector and has effectively improved detection performance as more qubits are assigned. In our evaluations on a state-of-the-art quantum annealer, fully parallel X-ResQ achieves near-optimal throughput (over 10 bits/s/Hz) for $4\times6$ MIMO with 16-QAM using six levels of parallelism with 240 qubits and $220~渭$s QA compute time, achieving 2.5--5$\times$ gains compared against other tested detectors. For more comprehensive evaluations, we implement and evaluate X-ResQ in the non-quantum digital setting. This non-quantum X-ResQ demonstration showcases the potential to realize ultra-large $1024\times1024$ MIMO, significantly outperforming other MIMO detectors, including the state-of-the-art RA detector classically implemented in the same way. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18778v2-abstract-full').style.display = 'none'; document.getElementById('2402.18778v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18751">arXiv:2402.18751</a> <span> [<a href="https://arxiv.org/pdf/2402.18751">pdf</a>, <a href="https://arxiv.org/format/2402.18751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Sensor and Multi-temporal High-Throughput Phenotyping for Monitoring and Early Detection of Water-Limiting Stress in Soybean </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jones%2C+S+E">Sarah E. Jones</a>, <a href="/search/cs?searchtype=author&query=Ayanlade%2C+T">Timilehin Ayanlade</a>, <a href="/search/cs?searchtype=author&query=Fallen%2C+B">Benjamin Fallen</a>, <a href="/search/cs?searchtype=author&query=Jubery%2C+T+Z">Talukder Z. Jubery</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Arti Singh</a>, <a href="/search/cs?searchtype=author&query=Ganapathysubramanian%2C+B">Baskar Ganapathysubramanian</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Soumik Sarkar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Asheesh K. Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18751v1-abstract-short" style="display: inline;"> Soybean production is susceptible to biotic and abiotic stresses, exacerbated by extreme weather events. Water limiting stress, i.e. drought, emerges as a significant risk for soybean production, underscoring the need for advancements in stress monitoring for crop breeding and production. This project combines multi-modal information to identify the most effective and efficient automated methods t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18751v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18751v1-abstract-full" style="display: none;"> Soybean production is susceptible to biotic and abiotic stresses, exacerbated by extreme weather events. Water limiting stress, i.e. drought, emerges as a significant risk for soybean production, underscoring the need for advancements in stress monitoring for crop breeding and production. This project combines multi-modal information to identify the most effective and efficient automated methods to investigate drought response. We investigated a set of diverse soybean accessions using multiple sensors in a time series high-throughput phenotyping manner to: (1) develop a pipeline for rapid classification of soybean drought stress symptoms, and (2) investigate methods for early detection of drought stress. We utilized high-throughput time-series phenotyping using UAVs and sensors in conjunction with machine learning (ML) analytics, which offered a swift and efficient means of phenotyping. The red-edge and green bands were most effective to classify canopy wilting stress. The Red-Edge Chlorophyll Vegetation Index (RECI) successfully differentiated susceptible and tolerant soybean accessions prior to visual symptom development. We report pre-visual detection of soybean wilting using a combination of different vegetation indices. These results can contribute to early stress detection methodologies and rapid classification of drought responses in screening nurseries for breeding and production applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18751v1-abstract-full').style.display = 'none'; document.getElementById('2402.18751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14903">arXiv:2402.14903</a> <span> [<a href="https://arxiv.org/pdf/2402.14903">pdf</a>, <a href="https://arxiv.org/format/2402.14903">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Tokenization counts: the impact of tokenization on arithmetic in frontier LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Strouse%2C+D">DJ Strouse</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14903v1-abstract-short" style="display: inline;"> Tokenization, the division of input text into input tokens, is an often overlooked aspect of the large language model (LLM) pipeline and could be the source of useful or harmful inductive biases. Historically, LLMs have relied on byte pair encoding, without care to specific input domains. With the increased use of LLMs for reasoning, various number-specific tokenization schemes have been adopted,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14903v1-abstract-full').style.display = 'inline'; document.getElementById('2402.14903v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14903v1-abstract-full" style="display: none;"> Tokenization, the division of input text into input tokens, is an often overlooked aspect of the large language model (LLM) pipeline and could be the source of useful or harmful inductive biases. Historically, LLMs have relied on byte pair encoding, without care to specific input domains. With the increased use of LLMs for reasoning, various number-specific tokenization schemes have been adopted, with popular models like LLaMa and PaLM opting for single-digit tokenization while GPT-3.5 and GPT-4 have separate tokens for each 1-, 2-, and 3-digit numbers. In this work, we study the effect this choice has on numerical reasoning through the use of arithmetic tasks. We consider left-to-right and right-to-left tokenization for GPT-3.5 and -4, finding that right-to-left tokenization (enforced by comma separating numbers at inference time) leads to largely improved performance. Furthermore, we find that model errors when using standard left-to-right tokenization follow stereotyped error patterns, suggesting that model computations are systematic rather than approximate. We show that the model is able to convert between tokenizations easily, thus allowing chain-of-thought-inspired approaches to recover performance on left-to-right tokenized inputs. We also find the gap between tokenization directions decreases when models are scaled, possibly indicating that larger models are better able to override this tokenization-dependent inductive bias. In summary, our work performs the first study of how number tokenization choices lead to differences in model performance on arithmetic tasks, accompanied by a thorough analysis of error patterns. We hope this work inspires practitioners to more carefully ablate number tokenization-related choices when working towards general models of numerical reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14903v1-abstract-full').style.display = 'none'; document.getElementById('2402.14903v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 18 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14558">arXiv:2402.14558</a> <span> [<a href="https://arxiv.org/pdf/2402.14558">pdf</a>, <a href="https://arxiv.org/format/2402.14558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLMs with Industrial Lens: Deciphering the Challenges and Prospects -- A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Urlana%2C+A">Ashok Urlana</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+C+V">Charaka Vinayak Kumar</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ajeet Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Garlapati%2C+B+M">Bala Mallikarjunarao Garlapati</a>, <a href="/search/cs?searchtype=author&query=Chalamala%2C+S+R">Srinivasa Rao Chalamala</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+R">Rahul Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14558v1-abstract-short" style="display: inline;"> Large language models (LLMs) have become the secret ingredient driving numerous industrial applications, showcasing their remarkable versatility across a diverse spectrum of tasks. From natural language processing and sentiment analysis to content generation and personalized recommendations, their unparalleled adaptability has facilitated widespread adoption across industries. This transformative… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14558v1-abstract-full').style.display = 'inline'; document.getElementById('2402.14558v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14558v1-abstract-full" style="display: none;"> Large language models (LLMs) have become the secret ingredient driving numerous industrial applications, showcasing their remarkable versatility across a diverse spectrum of tasks. From natural language processing and sentiment analysis to content generation and personalized recommendations, their unparalleled adaptability has facilitated widespread adoption across industries. This transformative shift driven by LLMs underscores the need to explore the underlying associated challenges and avenues for enhancement in their utilization. In this paper, our objective is to unravel and evaluate the obstacles and opportunities inherent in leveraging LLMs within an industrial context. To this end, we conduct a survey involving a group of industry practitioners, develop four research questions derived from the insights gathered, and examine 68 industry papers to address these questions and derive meaningful conclusions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14558v1-abstract-full').style.display = 'none'; document.getElementById('2402.14558v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09654">arXiv:2402.09654</a> <span> [<a href="https://arxiv.org/pdf/2402.09654">pdf</a>, <a href="https://arxiv.org/format/2402.09654">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> GPT-4's assessment of its performance in a USMLE-based case study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dhakal%2C+U">Uttam Dhakal</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aniket Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Devkota%2C+S">Suman Devkota</a>, <a href="/search/cs?searchtype=author&query=Sapkota%2C+Y">Yogesh Sapkota</a>, <a href="/search/cs?searchtype=author&query=Lamichhane%2C+B">Bishal Lamichhane</a>, <a href="/search/cs?searchtype=author&query=Paudyal%2C+S">Suprinsa Paudyal</a>, <a href="/search/cs?searchtype=author&query=Dhakal%2C+C">Chandra Dhakal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09654v2-abstract-short" style="display: inline;"> This study investigates GPT-4's assessment of its performance in healthcare applications. A simple prompting technique was used to prompt the LLM with questions taken from the United States Medical Licensing Examination (USMLE) questionnaire and it was tasked to evaluate its confidence score before posing the question and after asking the question. The questionnaire was categorized into two groups… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09654v2-abstract-full').style.display = 'inline'; document.getElementById('2402.09654v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09654v2-abstract-full" style="display: none;"> This study investigates GPT-4's assessment of its performance in healthcare applications. A simple prompting technique was used to prompt the LLM with questions taken from the United States Medical Licensing Examination (USMLE) questionnaire and it was tasked to evaluate its confidence score before posing the question and after asking the question. The questionnaire was categorized into two groups-questions with feedback (WF) and questions with no feedback(NF) post-question. The model was asked to provide absolute and relative confidence scores before and after each question. The experimental findings were analyzed using statistical tools to study the variability of confidence in WF and NF groups. Additionally, a sequential analysis was conducted to observe the performance variation for the WF and NF groups. Results indicate that feedback influences relative confidence but doesn't consistently increase or decrease it. Understanding the performance of LLM is paramount in exploring its utility in sensitive areas like healthcare. This study contributes to the ongoing discourse on the reliability of AI, particularly of LLMs like GPT-4, within healthcare, offering insights into how feedback mechanisms might be optimized to enhance AI-assisted medical education and decision support. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09654v2-abstract-full').style.display = 'none'; document.getElementById('2402.09654v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.07927">arXiv:2402.07927</a> <span> [<a href="https://arxiv.org/pdf/2402.07927">pdf</a>, <a href="https://arxiv.org/format/2402.07927">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> A Systematic Survey of Prompt Engineering in Large Language Models: Techniques and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sahoo%2C+P">Pranab Sahoo</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ayush Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Saha%2C+S">Sriparna Saha</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+V">Vinija Jain</a>, <a href="/search/cs?searchtype=author&query=Mondal%2C+S">Samrat Mondal</a>, <a href="/search/cs?searchtype=author&query=Chadha%2C+A">Aman Chadha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.07927v1-abstract-short" style="display: inline;"> Prompt engineering has emerged as an indispensable technique for extending the capabilities of large language models (LLMs) and vision-language models (VLMs). This approach leverages task-specific instructions, known as prompts, to enhance model efficacy without modifying the core model parameters. Rather than updating the model parameters, prompts allow seamless integration of pre-trained models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07927v1-abstract-full').style.display = 'inline'; document.getElementById('2402.07927v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.07927v1-abstract-full" style="display: none;"> Prompt engineering has emerged as an indispensable technique for extending the capabilities of large language models (LLMs) and vision-language models (VLMs). This approach leverages task-specific instructions, known as prompts, to enhance model efficacy without modifying the core model parameters. Rather than updating the model parameters, prompts allow seamless integration of pre-trained models into downstream tasks by eliciting desired model behaviors solely based on the given prompt. Prompts can be natural language instructions that provide context to guide the model or learned vector representations that activate relevant knowledge. This burgeoning field has enabled success across various applications, from question-answering to commonsense reasoning. However, there remains a lack of systematic organization and understanding of the diverse prompt engineering methods and techniques. This survey paper addresses the gap by providing a structured overview of recent advancements in prompt engineering, categorized by application area. For each prompting approach, we provide a summary detailing the prompting methodology, its applications, the models involved, and the datasets utilized. We also delve into the strengths and limitations of each approach and include a taxonomy diagram and table summarizing datasets, models, and critical points of each prompting technique. This systematic analysis enables a better understanding of this rapidly developing field and facilitates future research by illuminating open challenges and opportunities for prompt engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07927v1-abstract-full').style.display = 'none'; document.getElementById('2402.07927v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08943">arXiv:2401.08943</a> <span> [<a href="https://arxiv.org/pdf/2401.08943">pdf</a>, <a href="https://arxiv.org/format/2401.08943">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fluid Dynamic DNNs for Reliable and Adaptive Distributed Inference on Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xun%2C+L">Lei Xun</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Mingyu Hu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hengrui Zhao</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Amit Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Hare%2C+J">Jonathon Hare</a>, <a href="/search/cs?searchtype=author&query=Merrett%2C+G+V">Geoff V. Merrett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08943v1-abstract-short" style="display: inline;"> Distributed inference is a popular approach for efficient DNN inference at the edge. However, traditional Static and Dynamic DNNs are not distribution-friendly, causing system reliability and adaptability issues. In this paper, we introduce Fluid Dynamic DNNs (Fluid DyDNNs), tailored for distributed inference. Distinct from Static and Dynamic DNNs, Fluid DyDNNs utilize a novel nested incremental t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08943v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08943v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08943v1-abstract-full" style="display: none;"> Distributed inference is a popular approach for efficient DNN inference at the edge. However, traditional Static and Dynamic DNNs are not distribution-friendly, causing system reliability and adaptability issues. In this paper, we introduce Fluid Dynamic DNNs (Fluid DyDNNs), tailored for distributed inference. Distinct from Static and Dynamic DNNs, Fluid DyDNNs utilize a novel nested incremental training algorithm to enable independent and combined operation of its sub-networks, enhancing system reliability and adaptability. Evaluation on embedded Arm CPUs with a DNN model and the MNIST dataset, shows that in scenarios of single device failure, Fluid DyDNNs ensure continued inference, whereas Static and Dynamic DNNs fail. When devices are fully operational, Fluid DyDNNs can operate in either a High-Accuracy mode and achieve comparable accuracy with Static DNNs, or in a High-Throughput mode and achieve 2.5x and 2x throughput compared with Static and Dynamic DNNs, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08943v1-abstract-full').style.display = 'none'; document.getElementById('2401.08943v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at Design, Automation & Test in Europe Conference (DATE) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.12338">arXiv:2312.12338</a> <span> [<a href="https://arxiv.org/pdf/2312.12338">pdf</a>, <a href="https://arxiv.org/format/2312.12338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Smart Connected Farms and Networked Farmers to Tackle Climate Challenges Impacting Agricultural Production </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Balabaygloo%2C+B+J">Behzad J. Balabaygloo</a>, <a href="/search/cs?searchtype=author&query=Bekee%2C+B">Barituka Bekee</a>, <a href="/search/cs?searchtype=author&query=Blair%2C+S+W">Samuel W. Blair</a>, <a href="/search/cs?searchtype=author&query=Fey%2C+S">Suzanne Fey</a>, <a href="/search/cs?searchtype=author&query=Fotouhi%2C+F">Fateme Fotouhi</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Ashish Gupta</a>, <a href="/search/cs?searchtype=author&query=Menke%2C+K">Kevin Menke</a>, <a href="/search/cs?searchtype=author&query=Vangala%2C+A">Anusha Vangala</a>, <a href="/search/cs?searchtype=author&query=Palomares%2C+J+C+M">Jorge C. M. Palomares</a>, <a href="/search/cs?searchtype=author&query=Prestholt%2C+A">Aaron Prestholt</a>, <a href="/search/cs?searchtype=author&query=Tanwar%2C+V+K">Vishesh K. Tanwar</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xu Tao</a>, <a href="/search/cs?searchtype=author&query=Carroll%2C+M+E">Matthew E. Carroll</a>, <a href="/search/cs?searchtype=author&query=Das%2C+S">Sajal Das</a>, <a href="/search/cs?searchtype=author&query=Depaula%2C+G">Gil Depaula</a>, <a href="/search/cs?searchtype=author&query=Kyveryga%2C+P">Peter Kyveryga</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Soumik Sarkar</a>, <a href="/search/cs?searchtype=author&query=Segovia%2C+M">Michelle Segovia</a>, <a href="/search/cs?searchtype=author&query=Sylvestri%2C+S">Simone Sylvestri</a>, <a href="/search/cs?searchtype=author&query=Valdivia%2C+C">Corinne Valdivia</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Asheesh K. Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.12338v1-abstract-short" style="display: inline;"> To meet the grand challenges of agricultural production including climate change impacts on crop production, a tight integration of social science, technology and agriculture experts including farmers are needed. There are rapid advances in information and communication technology, precision agriculture and data analytics, which are creating a fertile field for the creation of smart connected farm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12338v1-abstract-full').style.display = 'inline'; document.getElementById('2312.12338v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.12338v1-abstract-full" style="display: none;"> To meet the grand challenges of agricultural production including climate change impacts on crop production, a tight integration of social science, technology and agriculture experts including farmers are needed. There are rapid advances in information and communication technology, precision agriculture and data analytics, which are creating a fertile field for the creation of smart connected farms (SCF) and networked farmers. A network and coordinated farmer network provides unique advantages to farmers to enhance farm production and profitability, while tackling adverse climate events. The aim of this article is to provide a comprehensive overview of the state of the art in SCF including the advances in engineering, computer sciences, data sciences, social sciences and economics including data privacy, sharing and technology adoption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12338v1-abstract-full').style.display = 'none'; document.getElementById('2312.12338v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.07759">arXiv:2312.07759</a> <span> [<a href="https://arxiv.org/pdf/2312.07759">pdf</a>, <a href="https://arxiv.org/ps/2312.07759">ps</a>, <a href="https://arxiv.org/format/2312.07759">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> IDKM: Memory Efficient Neural Network Quantization via Implicit, Differentiable k-Means </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jaffe%2C+S">Sean Jaffe</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ambuj K. Singh</a>, <a href="/search/cs?searchtype=author&query=Bullo%2C+F">Francesco Bullo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.07759v2-abstract-short" style="display: inline;"> Compressing large neural networks with minimal performance loss is crucial to enabling their deployment on edge devices. (Cho et al., 2022) proposed a weight quantization method that uses an attention-based clustering algorithm called differentiable $k$-means (DKM). Despite achieving state-of-the-art results, DKM's performance is constrained by its heavy memory dependency. We propose an implicit,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07759v2-abstract-full').style.display = 'inline'; document.getElementById('2312.07759v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.07759v2-abstract-full" style="display: none;"> Compressing large neural networks with minimal performance loss is crucial to enabling their deployment on edge devices. (Cho et al., 2022) proposed a weight quantization method that uses an attention-based clustering algorithm called differentiable $k$-means (DKM). Despite achieving state-of-the-art results, DKM's performance is constrained by its heavy memory dependency. We propose an implicit, differentiable $k$-means algorithm (IDKM), which eliminates the major memory restriction of DKM. Let $t$ be the number of $k$-means iterations, $m$ be the number of weight-vectors, and $b$ be the number of bits per cluster address. IDKM reduces the overall memory complexity of a single $k$-means layer from $\mathcal{O}(t \cdot m \cdot 2^b)$ to $\mathcal{O}( m \cdot 2^b)$. We also introduce a variant, IDKM with Jacobian-Free-Backpropagation (IDKM-JFB), for which the time complexity of the gradient calculation is independent of $t$ as well. We provide a proof of concept of our methods by showing that, under the same settings, IDKM achieves comparable performance to DKM with less compute time and less memory. We also use IDKM and IDKM-JFB to quantize a large neural network, Resnet18, on hardware where DKM cannot train at all. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.07759v2-abstract-full').style.display = 'none'; document.getElementById('2312.07759v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.02418">arXiv:2312.02418</a> <span> [<a href="https://arxiv.org/pdf/2312.02418">pdf</a>, <a href="https://arxiv.org/format/2312.02418">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Decoding Data Quality via Synthetic Corruptions: Embedding-guided Pruning of Code Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yu Yang</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Elhoushi%2C+M">Mostafa Elhoushi</a>, <a href="/search/cs?searchtype=author&query=Mahmoud%2C+A">Anas Mahmoud</a>, <a href="/search/cs?searchtype=author&query=Tirumala%2C+K">Kushal Tirumala</a>, <a href="/search/cs?searchtype=author&query=Gloeckle%2C+F">Fabian Gloeckle</a>, <a href="/search/cs?searchtype=author&query=Rozi%C3%A8re%2C+B">Baptiste Rozi猫re</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Carole-Jean Wu</a>, <a href="/search/cs?searchtype=author&query=Morcos%2C+A+S">Ari S. Morcos</a>, <a href="/search/cs?searchtype=author&query=Ardalani%2C+N">Newsha Ardalani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.02418v1-abstract-short" style="display: inline;"> Code datasets, often collected from diverse and uncontrolled sources such as GitHub, potentially suffer from quality issues, thereby affecting the performance and training efficiency of Large Language Models (LLMs) optimized for code generation. Previous studies demonstrated the benefit of using embedding spaces for data pruning, but they mainly focused on duplicate removal or increasing variety,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.02418v1-abstract-full').style.display = 'inline'; document.getElementById('2312.02418v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.02418v1-abstract-full" style="display: none;"> Code datasets, often collected from diverse and uncontrolled sources such as GitHub, potentially suffer from quality issues, thereby affecting the performance and training efficiency of Large Language Models (LLMs) optimized for code generation. Previous studies demonstrated the benefit of using embedding spaces for data pruning, but they mainly focused on duplicate removal or increasing variety, and in other modalities, such as images. Our work focuses on using embeddings to identify and remove "low-quality" code data. First, we explore features of "low-quality" code in embedding space, through the use of synthetic corruptions. Armed with this knowledge, we devise novel pruning metrics that operate in embedding space to identify and remove low-quality entries in the Stack dataset. We demonstrate the benefits of this synthetic corruption informed pruning (SCIP) approach on the well-established HumanEval and MBPP benchmarks, outperforming existing embedding-based methods. Importantly, we achieve up to a 3% performance improvement over no pruning, thereby showing the promise of insights from synthetic corruptions for data pruning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.02418v1-abstract-full').style.display = 'none'; document.getElementById('2312.02418v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures, Oral Presentation at 3rd Workshop on Efficient Natural Language and Speech Processing (ENLSP-III), NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.08360">arXiv:2311.08360</a> <span> [<a href="https://arxiv.org/pdf/2311.08360">pdf</a>, <a href="https://arxiv.org/format/2311.08360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Transient Nature of Emergent In-Context Learning in Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+S+C+Y">Stephanie C. Y. Chan</a>, <a href="/search/cs?searchtype=author&query=Moskovitz%2C+T">Ted Moskovitz</a>, <a href="/search/cs?searchtype=author&query=Grant%2C+E">Erin Grant</a>, <a href="/search/cs?searchtype=author&query=Saxe%2C+A+M">Andrew M. Saxe</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+F">Felix Hill</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.08360v3-abstract-short" style="display: inline;"> Transformer neural networks can exhibit a surprising capacity for in-context learning (ICL) despite not being explicitly trained for it. Prior work has provided a deeper understanding of how ICL emerges in transformers, e.g. through the lens of mechanistic interpretability, Bayesian inference, or by examining the distributional properties of training data. However, in each of these cases, ICL is t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08360v3-abstract-full').style.display = 'inline'; document.getElementById('2311.08360v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.08360v3-abstract-full" style="display: none;"> Transformer neural networks can exhibit a surprising capacity for in-context learning (ICL) despite not being explicitly trained for it. Prior work has provided a deeper understanding of how ICL emerges in transformers, e.g. through the lens of mechanistic interpretability, Bayesian inference, or by examining the distributional properties of training data. However, in each of these cases, ICL is treated largely as a persistent phenomenon; namely, once ICL emerges, it is assumed to persist asymptotically. Here, we show that the emergence of ICL during transformer training is, in fact, often transient. We train transformers on synthetic data designed so that both ICL and in-weights learning (IWL) strategies can lead to correct predictions. We find that ICL first emerges, then disappears and gives way to IWL, all while the training loss decreases, indicating an asymptotic preference for IWL. The transient nature of ICL is observed in transformers across a range of model sizes and datasets, raising the question of how much to "overtrain" transformers when seeking compact, cheaper-to-run models. We find that L2 regularization may offer a path to more persistent ICL that removes the need for early stopping based on ICL-style validation tasks. Finally, we present initial evidence that ICL transience may be caused by competition between ICL and IWL circuits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08360v3-abstract-full').style.display = 'none'; document.getElementById('2311.08360v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 16 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17808">arXiv:2310.17808</a> <span> [<a href="https://arxiv.org/pdf/2310.17808">pdf</a>, <a href="https://arxiv.org/format/2310.17808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> A Novel Fast Path Planning Approach for Mobile Devices using Hybrid Quantum Ant Colony Optimization Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sarkar%2C+M">Mayukh Sarkar</a>, <a href="/search/cs?searchtype=author&query=Pradhan%2C+J">Jitesh Pradhan</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Anil Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Nenavath%2C+H">Hathiram Nenavath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17808v1-abstract-short" style="display: inline;"> With IoT systems' increasing scale and complexity, maintenance of a large number of nodes using stationary devices is becoming increasingly difficult. Hence, mobile devices are being employed that can traverse through a set of target locations and provide the necessary services. In order to reduce energy consumption and time requirements, the devices are required to traverse following a Hamiltonia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17808v1-abstract-full').style.display = 'inline'; document.getElementById('2310.17808v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17808v1-abstract-full" style="display: none;"> With IoT systems' increasing scale and complexity, maintenance of a large number of nodes using stationary devices is becoming increasingly difficult. Hence, mobile devices are being employed that can traverse through a set of target locations and provide the necessary services. In order to reduce energy consumption and time requirements, the devices are required to traverse following a Hamiltonian path. This problem can be formulated as a Travelling Salesman Problem (TSP), an NP-hard problem. Moreover, in emergency services, the devices must traverse in real-time, demanding speedy path planning from the TSP instance. Among the well-known optimization techniques for solving the TSP problem, Ant Colony Optimization has a good stronghold in providing good approximate solutions. Moreover, ACO not only provides near-optimal solutions for TSP instances but can also output optimal or near-optimal solutions for many other demanding hard optimization problems. However, to have a fast solution, the next node selection, which needs to consider all the neighbors for each selection, becomes a bottleneck in the path formation step. Moreover, classical computers are constrained to generate only pseudorandom numbers. Both these problems can be solved using quantum computing techniques, i.e., the next node can be selected with proper randomization, respecting the provided set of probabilities in just a single execution and single measurement of a quantum circuit. Simulation results of the proposed Hybrid Quantum Ant Colony Optimization algorithm on several TSP instances have shown promising results, thus expecting the proposed work to be important in implementing real-time path planning in quantum-enabled mobile devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17808v1-abstract-full').style.display = 'none'; document.getElementById('2310.17808v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14766">arXiv:2310.14766</a> <span> [<a href="https://arxiv.org/pdf/2310.14766">pdf</a>, <a href="https://arxiv.org/format/2310.14766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> End-to-End Learning of Behavioural Inputs for Autonomous Driving in Dense Traffic </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shrestha%2C+J">Jatan Shrestha</a>, <a href="/search/cs?searchtype=author&query=Idoko%2C+S">Simon Idoko</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+B">Basant Sharma</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14766v1-abstract-short" style="display: inline;"> Trajectory sampling in the Frenet(road-aligned) frame, is one of the most popular methods for motion planning of autonomous vehicles. It operates by sampling a set of behavioural inputs, such as lane offset and forward speed, before solving a trajectory optimization problem conditioned on the sampled inputs. The sampling is handcrafted based on simple heuristics, does not adapt to driving scenario… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14766v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14766v1-abstract-full" style="display: none;"> Trajectory sampling in the Frenet(road-aligned) frame, is one of the most popular methods for motion planning of autonomous vehicles. It operates by sampling a set of behavioural inputs, such as lane offset and forward speed, before solving a trajectory optimization problem conditioned on the sampled inputs. The sampling is handcrafted based on simple heuristics, does not adapt to driving scenarios, and is oblivious to the capabilities of downstream trajectory planners. In this paper, we propose an end-to-end learning of behavioural input distribution from expert demonstrations or in a self-supervised manner. Our core novelty lies in embedding a custom differentiable trajectory optimizer as a layer in neural networks, allowing us to update behavioural inputs by considering the optimizer's feedback. Moreover, our end-to-end approach also ensures that the learned behavioural inputs aid the convergence of the optimizer. We improve the state-of-the-art in the following aspects. First, we show that learned behavioural inputs substantially decrease collision rate while improving driving efficiency over handcrafted approaches. Second, our approach outperforms model predictive control methods based on sampling-based optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14766v1-abstract-full').style.display = 'none'; document.getElementById('2310.14766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IROS 2023. arXiv admin note: text overlap with arXiv:2212.02224</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09195">arXiv:2310.09195</a> <span> [<a href="https://arxiv.org/pdf/2310.09195">pdf</a>, <a href="https://arxiv.org/format/2310.09195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> AMSwarmX: Safe Swarm Coordination in CompleX Environments via Implicit Non-Convex Decomposition of the Obstacle-Free Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Adajania%2C+V+K">Vivek K. Adajania</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Siqi Zhou</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Schoellig%2C+A+P">Angela P. Schoellig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09195v1-abstract-short" style="display: inline;"> Quadrotor motion planning in complex environments leverage the concept of safe flight corridor (SFC) to facilitate static obstacle avoidance. Typically, SFCs are constructed through convex decomposition of the environment's free space into cuboids, convex polyhedra, or spheres. However, when dealing with a quadrotor swarm, such SFCs can be overly conservative, substantially limiting the available… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09195v1-abstract-full').style.display = 'inline'; document.getElementById('2310.09195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09195v1-abstract-full" style="display: none;"> Quadrotor motion planning in complex environments leverage the concept of safe flight corridor (SFC) to facilitate static obstacle avoidance. Typically, SFCs are constructed through convex decomposition of the environment's free space into cuboids, convex polyhedra, or spheres. However, when dealing with a quadrotor swarm, such SFCs can be overly conservative, substantially limiting the available free space for quadrotors to coordinate. This paper presents an Alternating Minimization-based approach that does not require building a conservative free-space approximation. Instead, both static and dynamic collision constraints are treated in a unified manner. Dynamic collisions are handled based on shared position trajectories of the quadrotors. Static obstacle avoidance is coupled with distance queries from the Octomap, providing an implicit non-convex decomposition of free space. As a result, our approach is scalable to arbitrary complex environments. Through extensive comparisons in simulation, we demonstrate a $60\%$ improvement in success rate, an average $1.8\times$ reduction in mission completion time, and an average $23\times$ reduction in per-agent computation time compared to SFC-based approaches. We also experimentally validated our approach using a Crazyflie quadrotor swarm of up to 12 quadrotors in obstacle-rich environments. The code, supplementary materials, and videos are released for reference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09195v1-abstract-full').style.display = 'none'; document.getElementById('2310.09195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICRA 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.08270">arXiv:2310.08270</a> <span> [<a href="https://arxiv.org/pdf/2310.08270">pdf</a>, <a href="https://arxiv.org/format/2310.08270">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Hilbert Space Embedding-based Trajectory Optimization for Multi-Modal Uncertain Obstacle Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+B">Basant Sharma</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Aditya Sharma</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K. Madhava Krishna</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun Kumar Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.08270v1-abstract-short" style="display: inline;"> Safe autonomous driving critically depends on how well the ego-vehicle can predict the trajectories of neighboring vehicles. To this end, several trajectory prediction algorithms have been presented in the existing literature. Many of these approaches output a multi-modal distribution of obstacle trajectories instead of a single deterministic prediction to account for the underlying uncertainty. H… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08270v1-abstract-full').style.display = 'inline'; document.getElementById('2310.08270v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.08270v1-abstract-full" style="display: none;"> Safe autonomous driving critically depends on how well the ego-vehicle can predict the trajectories of neighboring vehicles. To this end, several trajectory prediction algorithms have been presented in the existing literature. Many of these approaches output a multi-modal distribution of obstacle trajectories instead of a single deterministic prediction to account for the underlying uncertainty. However, existing planners cannot handle the multi-modality based on just sample-level information of the predictions. With this motivation, this paper proposes a trajectory optimizer that can leverage the distributional aspects of the prediction in a computationally tractable and sample-efficient manner. Our optimizer can work with arbitrarily complex distributions and thus can be used with output distribution represented as a deep neural network. The core of our approach is built on embedding distribution in Reproducing Kernel Hilbert Space (RKHS), which we leverage in two ways. First, we propose an RKHS embedding approach to select probable samples from the obstacle trajectory distribution. Second, we rephrase chance-constrained optimization as distribution matching in RKHS and propose a novel sampling-based optimizer for its solution. We validate our approach with hand-crafted and neural network-based predictors trained on real-world datasets and show improvement over the existing stochastic optimization approaches in safety metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08270v1-abstract-full').style.display = 'none'; document.getElementById('2310.08270v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04373">arXiv:2310.04373</a> <span> [<a href="https://arxiv.org/pdf/2310.04373">pdf</a>, <a href="https://arxiv.org/format/2310.04373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Confronting Reward Model Overoptimization with Constrained RLHF </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moskovitz%2C+T">Ted Moskovitz</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aaditya K. Singh</a>, <a href="/search/cs?searchtype=author&query=Strouse%2C+D">DJ Strouse</a>, <a href="/search/cs?searchtype=author&query=Sandholm%2C+T">Tuomas Sandholm</a>, <a href="/search/cs?searchtype=author&query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&query=Dragan%2C+A+D">Anca D. Dragan</a>, <a href="/search/cs?searchtype=author&query=McAleer%2C+S">Stephen McAleer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04373v2-abstract-short" style="display: inline;"> Large language models are typically aligned with human preferences by optimizing $\textit{reward models}$ (RMs) fitted to human feedback. However, human preferences are multi-faceted, and it is increasingly common to derive reward from a composition of simpler reward models which each capture a different aspect of language quality. This itself presents a challenge, as it is difficult to appropriat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04373v2-abstract-full').style.display = 'inline'; document.getElementById('2310.04373v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04373v2-abstract-full" style="display: none;"> Large language models are typically aligned with human preferences by optimizing $\textit{reward models}$ (RMs) fitted to human feedback. However, human preferences are multi-faceted, and it is increasingly common to derive reward from a composition of simpler reward models which each capture a different aspect of language quality. This itself presents a challenge, as it is difficult to appropriately weight these component RMs when combining them. Compounding this difficulty, because any RM is only a proxy for human evaluation, this process is vulnerable to $\textit{overoptimization}$, wherein past a certain point, accumulating higher reward is associated with worse human ratings. In this paper, we perform, to our knowledge, the first study on overoptimization in composite RMs, showing that correlation between component RMs has a significant effect on the locations of these points. We then introduce an approach to solve this issue using constrained reinforcement learning as a means of preventing the agent from exceeding each RM's threshold of usefulness. Our method addresses the problem of weighting component RMs by learning dynamic weights, naturally expressed by Lagrange multipliers. As a result, each RM stays within the range at which it is an effective proxy, improving evaluation performance. Finally, we introduce an adaptive method using gradient-free optimization to identify and optimize towards these points during a single run. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04373v2-abstract-full').style.display = 'none'; document.getElementById('2310.04373v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.02251">arXiv:2310.02251</a> <span> [<a href="https://arxiv.org/pdf/2310.02251">pdf</a>, <a href="https://arxiv.org/format/2310.02251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Talk2BEV: Language-enhanced Bird's-eye View Maps for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choudhary%2C+T">Tushar Choudhary</a>, <a href="/search/cs?searchtype=author&query=Dewangan%2C+V">Vikrant Dewangan</a>, <a href="/search/cs?searchtype=author&query=Chandhok%2C+S">Shivam Chandhok</a>, <a href="/search/cs?searchtype=author&query=Priyadarshan%2C+S">Shubham Priyadarshan</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+A">Anushka Jain</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Arun K. Singh</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+S">Siddharth Srivastava</a>, <a href="/search/cs?searchtype=author&query=Jatavallabhula%2C+K+M">Krishna Murthy Jatavallabhula</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+K+M">K. Madhava Krishna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.02251v2-abstract-short" style="display: inline;"> Talk2BEV is a large vision-language model (LVLM) interface for bird's-eye view (BEV) maps in autonomous driving contexts. While existing perception systems for autonomous driving scenarios have largely focused on a pre-defined (closed) set of object categories and driving scenarios, Talk2BEV blends recent advances in general-purpose language and vision models with BEV-structured map representation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02251v2-abstract-full').style.display = 'inline'; document.getElementById('2310.02251v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.02251v2-abstract-full" style="display: none;"> Talk2BEV is a large vision-language model (LVLM) interface for bird's-eye view (BEV) maps in autonomous driving contexts. While existing perception systems for autonomous driving scenarios have largely focused on a pre-defined (closed) set of object categories and driving scenarios, Talk2BEV blends recent advances in general-purpose language and vision models with BEV-structured map representations, eliminating the need for task-specific models. This enables a single system to cater to a variety of autonomous driving tasks encompassing visual and spatial reasoning, predicting the intents of traffic actors, and decision-making based on visual cues. We extensively evaluate Talk2BEV on a large number of scene understanding tasks that rely on both the ability to interpret free-form natural language queries, and in grounding these queries to the visual context embedded into the language-enhanced BEV map. To enable further research in LVLMs for autonomous driving scenarios, we develop and release Talk2BEV-Bench, a benchmark encompassing 1000 human-annotated BEV scenarios, with more than 20,000 questions and ground-truth responses from the NuScenes dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02251v2-abstract-full').style.display = 'none'; document.getElementById('2310.02251v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page at https://llmbev.github.io/talk2bev/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.16145">arXiv:2309.16145</a> <span> [<a href="https://arxiv.org/pdf/2309.16145">pdf</a>, <a href="https://arxiv.org/format/2309.16145">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> The Confidence-Competence Gap in Large Language Models: A Cognitive Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Aniket Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Devkota%2C+S">Suman Devkota</a>, <a href="/search/cs?searchtype=author&query=Lamichhane%2C+B">Bishal Lamichhane</a>, <a href="/search/cs?searchtype=author&query=Dhakal%2C+U">Uttam Dhakal</a>, <a href="/search/cs?searchtype=author&query=Dhakal%2C+C">Chandra Dhakal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.16145v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have acquired ubiquitous attention for their performances across diverse domains. Our study here searches through LLMs' cognitive abilities and confidence dynamics. We dive deep into understanding the alignment between their self-assessed confidence and actual performance. We exploit these models with diverse sets of questionnaires and real-world scenarios and extract… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16145v1-abstract-full').style.display = 'inline'; document.getElementById('2309.16145v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.16145v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have acquired ubiquitous attention for their performances across diverse domains. Our study here searches through LLMs' cognitive abilities and confidence dynamics. We dive deep into understanding the alignment between their self-assessed confidence and actual performance. We exploit these models with diverse sets of questionnaires and real-world scenarios and extract how LLMs exhibit confidence in their responses. Our findings reveal intriguing instances where models demonstrate high confidence even when they answer incorrectly. This is reminiscent of the Dunning-Kruger effect observed in human psychology. In contrast, there are cases where models exhibit low confidence with correct answers revealing potential underestimation biases. Our results underscore the need for a deeper understanding of their cognitive processes. By examining the nuances of LLMs' self-assessment mechanism, this investigation provides noteworthy revelations that serve to advance the functionalities and broaden the potential applications of these formidable language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.16145v1-abstract-full').style.display = 'none'; document.getElementById('2309.16145v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 8 Figures, to be published in a journal (Journal TBD), All Authors contributed equally and were Supervised by Chandra Dhakal</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> ACM-class: I.2.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.15881">arXiv:2309.15881</a> <span> [<a href="https://arxiv.org/pdf/2309.15881">pdf</a>, <a href="https://arxiv.org/format/2309.15881">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Cross-Category Learning in Recommendation Systems with Multi-Layer Embedding Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zihao Deng</a>, <a href="/search/cs?searchtype=author&query=Ghaemmaghami%2C+B">Benjamin Ghaemmaghami</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A+K">Ashish Kumar Singh</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+B">Benjamin Cho</a>, <a href="/search/cs?searchtype=author&query=Orshansky%2C+L">Leo Orshansky</a>, <a href="/search/cs?searchtype=author&query=Erez%2C+M">Mattan Erez</a>, <a href="/search/cs?searchtype=author&query=Orshansky%2C+M">Michael Orshansky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.15881v1-abstract-short" style="display: inline;"> Modern DNN-based recommendation systems rely on training-derived embeddings of sparse features. Input sparsity makes obtaining high-quality embeddings for rarely-occurring categories harder as their representations are updated infrequently. We demonstrate a training-time technique to produce superior embeddings via effective cross-category learning and theoretically explain its surprising effectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15881v1-abstract-full').style.display = 'inline'; document.getElementById('2309.15881v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.15881v1-abstract-full" style="display: none;"> Modern DNN-based recommendation systems rely on training-derived embeddings of sparse features. Input sparsity makes obtaining high-quality embeddings for rarely-occurring categories harder as their representations are updated infrequently. We demonstrate a training-time technique to produce superior embeddings via effective cross-category learning and theoretically explain its surprising effectiveness. The scheme, termed the multi-layer embeddings training (MLET), trains embeddings using factorization of the embedding layer, with an inner dimension higher than the target embedding dimension. For inference efficiency, MLET converts the trained two-layer embedding into a single-layer one thus keeping inference-time model size unchanged. Empirical superiority of MLET is puzzling as its search space is not larger than that of the single-layer embedding. The strong dependence of MLET on the inner dimension is even more surprising. We develop a theory that explains both of these behaviors by showing that MLET creates an adaptive update mechanism modulated by the singular vectors of embeddings. When tested on multiple state-of-the-art recommendation models for click-through rate (CTR) prediction tasks, MLET consistently produces better models, especially for rare items. At constant model quality, MLET allows embedding dimension, and model size, reduction by up to 16x, and 5.8x on average, across the models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15881v1-abstract-full').style.display = 'none'; document.getElementById('2309.15881v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is the preprint of our paper accepted at ACML 2023</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Singh%2C+A+K&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository