Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 474 results for author: <span class="mathjax">Jain, S</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Jain%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jain, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jain%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jain, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jain%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10867">arXiv:2411.10867</a> <span> [<a href="https://arxiv.org/pdf/2411.10867">pdf</a>, <a href="https://arxiv.org/format/2411.10867">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ViBe: A Text-to-Video Benchmark for Evaluating Hallucination in Large Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rawte%2C+V">Vipula Rawte</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sarthak Jain</a>, <a href="/search/cs?searchtype=author&query=Sinha%2C+A">Aarush Sinha</a>, <a href="/search/cs?searchtype=author&query=Kaushik%2C+G">Garv Kaushik</a>, <a href="/search/cs?searchtype=author&query=Bansal%2C+A">Aman Bansal</a>, <a href="/search/cs?searchtype=author&query=Vishwanath%2C+P+R">Prathiksha Rumale Vishwanath</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S+R">Samyak Rajesh Jain</a>, <a href="/search/cs?searchtype=author&query=Reganti%2C+A+N">Aishwarya Naresh Reganti</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+V">Vinija Jain</a>, <a href="/search/cs?searchtype=author&query=Chadha%2C+A">Aman Chadha</a>, <a href="/search/cs?searchtype=author&query=Sheth%2C+A+P">Amit P. Sheth</a>, <a href="/search/cs?searchtype=author&query=Das%2C+A">Amitava Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10867v1-abstract-short" style="display: inline;"> Latest developments in Large Multimodal Models (LMMs) have broadened their capabilities to include video understanding. Specifically, Text-to-video (T2V) models have made significant progress in quality, comprehension, and duration, excelling at creating videos from simple textual prompts. Yet, they still frequently produce hallucinated content that clearly signals the video is AI-generated. We in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10867v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10867v1-abstract-full" style="display: none;"> Latest developments in Large Multimodal Models (LMMs) have broadened their capabilities to include video understanding. Specifically, Text-to-video (T2V) models have made significant progress in quality, comprehension, and duration, excelling at creating videos from simple textual prompts. Yet, they still frequently produce hallucinated content that clearly signals the video is AI-generated. We introduce ViBe: a large-scale Text-to-Video Benchmark of hallucinated videos from T2V models. We identify five major types of hallucination: Vanishing Subject, Numeric Variability, Temporal Dysmorphia, Omission Error, and Physical Incongruity. Using 10 open-source T2V models, we developed the first large-scale dataset of hallucinated videos, comprising 3,782 videos annotated by humans into these five categories. ViBe offers a unique resource for evaluating the reliability of T2V models and provides a foundation for improving hallucination detection and mitigation in video generation. We establish classification as a baseline and present various ensemble classifier configurations, with the TimeSFormer + CNN combination yielding the best performance, achieving 0.345 accuracy and 0.342 F1 score. This benchmark aims to drive the development of robust T2V models that produce videos more accurately aligned with input prompts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10867v1-abstract-full').style.display = 'none'; document.getElementById('2411.10867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09893">arXiv:2411.09893</a> <span> [<a href="https://arxiv.org/pdf/2411.09893">pdf</a>, <a href="https://arxiv.org/format/2411.09893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Memory Proxy Maps for Visual Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Johnson%2C+F">Faith Johnson</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+B+B">Bryan Bo Cao</a>, <a href="/search/cs?searchtype=author&query=Ashok%2C+A">Ashwin Ashok</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubham Jain</a>, <a href="/search/cs?searchtype=author&query=Dana%2C+K">Kristin Dana</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09893v1-abstract-short" style="display: inline;"> Visual navigation takes inspiration from humans, who navigate in previously unseen environments using vision without detailed environment maps. Inspired by this, we introduce a novel no-RL, no-graph, no-odometry approach to visual navigation using feudal learning to build a three tiered agent. Key to our approach is a memory proxy map (MPM), an intermediate representation of the environment learne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09893v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09893v1-abstract-full" style="display: none;"> Visual navigation takes inspiration from humans, who navigate in previously unseen environments using vision without detailed environment maps. Inspired by this, we introduce a novel no-RL, no-graph, no-odometry approach to visual navigation using feudal learning to build a three tiered agent. Key to our approach is a memory proxy map (MPM), an intermediate representation of the environment learned in a self-supervised manner by the high-level manager agent that serves as a simplified memory, approximating what the agent has seen. We demonstrate that recording observations in this learned latent space is an effective and efficient memory proxy that can remove the need for graphs and odometry in visual navigation tasks. For the mid-level manager agent, we develop a waypoint network (WayNet) that outputs intermediate subgoals, or waypoints, imitating human waypoint selection during local navigation. For the low-level worker agent, we learn a classifier over a discrete action space that avoids local obstacles and moves the agent towards the WayNet waypoint. The resulting feudal navigation network offers a novel approach with no RL, no graph, no odometry, and no metric map; all while achieving SOTA results on the image goal navigation task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09893v1-abstract-full').style.display = 'none'; document.getElementById('2411.09893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2402.12498</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08027">arXiv:2411.08027</a> <span> [<a href="https://arxiv.org/pdf/2411.08027">pdf</a>, <a href="https://arxiv.org/format/2411.08027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> LLMPhy: Complex Physical Reasoning Using Large Language Models and World Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cherian%2C+A">Anoop Cherian</a>, <a href="/search/cs?searchtype=author&query=Corcodel%2C+R">Radu Corcodel</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Siddarth Jain</a>, <a href="/search/cs?searchtype=author&query=Romeres%2C+D">Diego Romeres</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08027v1-abstract-short" style="display: inline;"> Physical reasoning is an important skill needed for robotic agents when operating in the real world. However, solving such reasoning problems often involves hypothesizing and reflecting over complex multi-body interactions under the effect of a multitude of physical forces and thus learning all such interactions poses a significant hurdle for state-of-the-art machine learning frameworks, including… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08027v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08027v1-abstract-full" style="display: none;"> Physical reasoning is an important skill needed for robotic agents when operating in the real world. However, solving such reasoning problems often involves hypothesizing and reflecting over complex multi-body interactions under the effect of a multitude of physical forces and thus learning all such interactions poses a significant hurdle for state-of-the-art machine learning frameworks, including large language models (LLMs). To study this problem, we propose a new physical reasoning task and a dataset, dubbed TraySim. Our task involves predicting the dynamics of several objects on a tray that is given an external impact -- the domino effect of the ensued object interactions and their dynamics thus offering a challenging yet controlled setup, with the goal of reasoning being to infer the stability of the objects after the impact. To solve this complex physical reasoning task, we present LLMPhy, a zero-shot black-box optimization framework that leverages the physics knowledge and program synthesis abilities of LLMs, and synergizes these abilities with the world models built into modern physics engines. Specifically, LLMPhy uses an LLM to generate code to iteratively estimate the physical hyperparameters of the system (friction, damping, layout, etc.) via an implicit analysis-by-synthesis approach using a (non-differentiable) simulator in the loop and uses the inferred parameters to imagine the dynamics of the scene towards solving the reasoning task. To show the effectiveness of LLMPhy, we present experiments on our TraySim dataset to predict the steady-state poses of the objects. Our results show that the combination of the LLM and the physics engine leads to state-of-the-art zero-shot physical reasoning performance, while demonstrating superior convergence against standard black-box optimization methods and better estimation of the physical parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08027v1-abstract-full').style.display = 'none'; document.getElementById('2411.08027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07389">arXiv:2411.07389</a> <span> [<a href="https://arxiv.org/pdf/2411.07389">pdf</a>, <a href="https://arxiv.org/ps/2411.07389">ps</a>, <a href="https://arxiv.org/format/2411.07389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> An Improved Algorithm for Sparse Instances of SAT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanjay Jain</a>, <a href="/search/cs?searchtype=author&query=Neoh%2C+T+Y">Tzeh Yuan Neoh</a>, <a href="/search/cs?searchtype=author&query=Stephan%2C+F">Frank Stephan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07389v1-abstract-short" style="display: inline;"> We show that the CNF satisfiability problem (SAT) can be solved in time $O^*(1.1199^{(d-2)n})$, where $d$ is either the maximum number of occurrences of any variable or the average number of occurrences of all variables if no variable occurs only once. This improves upon the known upper bound of $O^*(1.1279^{(d-2)n})$ by Wahlstr$\ddot{\text{o}}$m (SAT 2005) and $O^*(1.1238^{(d-2)n})$ by Peng and X… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07389v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07389v1-abstract-full" style="display: none;"> We show that the CNF satisfiability problem (SAT) can be solved in time $O^*(1.1199^{(d-2)n})$, where $d$ is either the maximum number of occurrences of any variable or the average number of occurrences of all variables if no variable occurs only once. This improves upon the known upper bound of $O^*(1.1279^{(d-2)n})$ by Wahlstr$\ddot{\text{o}}$m (SAT 2005) and $O^*(1.1238^{(d-2)n})$ by Peng and Xiao (IJCAI 2023). For $d\leq 4$, our algorithm is better than previous results. Our main technical result is an algorithm that runs in $O^*(1.1199^n)$ for 3-occur-SAT, a restricted instance of SAT where all variables have at most 3 occurrences. Through deeper case analysis and a reduction rule that allows us to resolve many variables under a relatively broad criteria, we are able to circumvent the bottlenecks in previous algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07389v1-abstract-full').style.display = 'none'; document.getElementById('2411.07389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05747">arXiv:2411.05747</a> <span> [<a href="https://arxiv.org/pdf/2411.05747">pdf</a>, <a href="https://arxiv.org/format/2411.05747">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WavShadow: Wavelet Based Shadow Segmentation and Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shreyans Jain</a>, <a href="/search/cs?searchtype=author&query=Vekaria%2C+V">Viraj Vekaria</a>, <a href="/search/cs?searchtype=author&query=Gandhi%2C+K">Karan Gandhi</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+A">Aadya Arora</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05747v3-abstract-short" style="display: inline;"> Shadow removal and segmentation remain challenging tasks in computer vision, particularly in complex real world scenarios. This study presents a novel approach that enhances the ShadowFormer model by incorporating Masked Autoencoder (MAE) priors and Fast Fourier Convolution (FFC) blocks, leading to significantly faster convergence and improved performance. We introduce key innovations: (1) integra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05747v3-abstract-full').style.display = 'inline'; document.getElementById('2411.05747v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05747v3-abstract-full" style="display: none;"> Shadow removal and segmentation remain challenging tasks in computer vision, particularly in complex real world scenarios. This study presents a novel approach that enhances the ShadowFormer model by incorporating Masked Autoencoder (MAE) priors and Fast Fourier Convolution (FFC) blocks, leading to significantly faster convergence and improved performance. We introduce key innovations: (1) integration of MAE priors trained on Places2 dataset for better context understanding, (2) adoption of Haar wavelet features for enhanced edge detection and multiscale analysis, and (3) implementation of a modified SAM Adapter for robust shadow segmentation. Extensive experiments on the challenging DESOBA dataset demonstrate that our approach achieves state of the art results, with notable improvements in both convergence speed and shadow removal quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05747v3-abstract-full').style.display = 'none'; document.getElementById('2411.05747v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04699">arXiv:2411.04699</a> <span> [<a href="https://arxiv.org/pdf/2411.04699">pdf</a>, <a href="https://arxiv.org/format/2411.04699">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BhasaAnuvaad: A Speech Translation Dataset for 13 Indian Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sparsh Jain</a>, <a href="/search/cs?searchtype=author&query=Sankar%2C+A">Ashwin Sankar</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+D">Devilal Choudhary</a>, <a href="/search/cs?searchtype=author&query=Suman%2C+D">Dhairya Suman</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+N">Nikhil Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+M+S+U+R">Mohammed Safi Ur Rahman Khan</a>, <a href="/search/cs?searchtype=author&query=Kunchukuttan%2C+A">Anoop Kunchukuttan</a>, <a href="/search/cs?searchtype=author&query=Khapra%2C+M+M">Mitesh M Khapra</a>, <a href="/search/cs?searchtype=author&query=Dabre%2C+R">Raj Dabre</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04699v2-abstract-short" style="display: inline;"> Automatic Speech Translation (AST) datasets for Indian languages remain critically scarce, with public resources covering fewer than 10 of the 22 official languages. This scarcity has resulted in AST systems for Indian languages lagging far behind those available for high-resource languages like English. In this paper, we first evaluate the performance of widely-used AST systems on Indian language… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04699v2-abstract-full').style.display = 'inline'; document.getElementById('2411.04699v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04699v2-abstract-full" style="display: none;"> Automatic Speech Translation (AST) datasets for Indian languages remain critically scarce, with public resources covering fewer than 10 of the 22 official languages. This scarcity has resulted in AST systems for Indian languages lagging far behind those available for high-resource languages like English. In this paper, we first evaluate the performance of widely-used AST systems on Indian languages, identifying notable performance gaps and challenges. Our findings show that while these systems perform adequately on read speech, they struggle significantly with spontaneous speech, including disfluencies like pauses and hesitations. Additionally, there is a striking absence of systems capable of accurately translating colloquial and informal language, a key aspect of everyday communication. To this end, we introduce BhasaAnuvaad, the largest publicly available dataset for AST involving 13 out of 22 scheduled Indian languages and English spanning over 44,400 hours and 17M text segments. BhasaAnuvaad contains data for English speech to Indic text, as well as Indic speech to English text. This dataset comprises three key categories: (1) Curated datasets from existing resources, (2) Large-scale web mining, and (3) Synthetic data generation. By offering this diverse and expansive dataset, we aim to bridge the resource gap and promote advancements in AST for Indian languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04699v2-abstract-full').style.display = 'none'; document.getElementById('2411.04699v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03296">arXiv:2411.03296</a> <span> [<a href="https://arxiv.org/pdf/2411.03296">pdf</a>, <a href="https://arxiv.org/format/2411.03296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> </div> <p class="title is-5 mathjax"> Quantum Communication Advantage in TFNP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=G%C3%B6%C3%B6s%2C+M">Mika G枚枚s</a>, <a href="/search/cs?searchtype=author&query=Gur%2C+T">Tom Gur</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Siddhartha Jain</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiawei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03296v1-abstract-short" style="display: inline;"> We exhibit a total search problem whose communication complexity in the quantum SMP (simultaneous message passing) model is exponentially smaller than in the classical two-way randomized model. Moreover, the quantum protocol is computationally efficient and its solutions are classically verifiable, that is, the problem lies in the communication analogue of the class TFNP. Our problem is a bipartit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03296v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03296v1-abstract-full" style="display: none;"> We exhibit a total search problem whose communication complexity in the quantum SMP (simultaneous message passing) model is exponentially smaller than in the classical two-way randomized model. Moreover, the quantum protocol is computationally efficient and its solutions are classically verifiable, that is, the problem lies in the communication analogue of the class TFNP. Our problem is a bipartite version of a query complexity problem recently introduced by Yamakawa and Zhandry (JACM 2024). We prove the classical lower bound using the structure-vs-randomness paradigm for analyzing communication protocols. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03296v1-abstract-full').style.display = 'none'; document.getElementById('2411.03296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02730">arXiv:2411.02730</a> <span> [<a href="https://arxiv.org/pdf/2411.02730">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Natural Language Processing Approach to Support Biomedical Data Harmonization: Leveraging Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zexu Li</a>, <a href="/search/cs?searchtype=author&query=Prabhu%2C+S+P">Suraj P. Prabhu</a>, <a href="/search/cs?searchtype=author&query=Popp%2C+Z+T">Zachary T. Popp</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S+S">Shubhi S. Jain</a>, <a href="/search/cs?searchtype=author&query=Balakundi%2C+V">Vijetha Balakundi</a>, <a href="/search/cs?searchtype=author&query=Ang%2C+T+F+A">Ting Fang Alvin Ang</a>, <a href="/search/cs?searchtype=author&query=Au%2C+R">Rhoda Au</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jinying Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02730v1-abstract-short" style="display: inline;"> Biomedical research requires large, diverse samples to produce unbiased results. Automated methods for matching variables across datasets can accelerate this process. Research in this area has been limited, primarily focusing on lexical matching and ontology based semantic matching. We aimed to develop new methods, leveraging large language models (LLM) and ensemble learning, to automate variable… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02730v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02730v1-abstract-full" style="display: none;"> Biomedical research requires large, diverse samples to produce unbiased results. Automated methods for matching variables across datasets can accelerate this process. Research in this area has been limited, primarily focusing on lexical matching and ontology based semantic matching. We aimed to develop new methods, leveraging large language models (LLM) and ensemble learning, to automate variable matching. Methods: We utilized data from two GERAS cohort (European and Japan) studies to develop variable matching methods. We first manually created a dataset by matching 352 EU variables with 1322 candidate JP variables, where matched variable pairs were positive and unmatched pairs were negative instances. Using this dataset, we developed and evaluated two types of natural language processing (NLP) methods, which matched variables based on variable labels and definitions from data dictionaries: (1) LLM-based and (2) fuzzy matching. We then developed an ensemble-learning method, using the Random Forest model, to integrate individual NLP methods. RF was trained and evaluated on 50 trials. Each trial had a random split (4:1) of training and test sets, with the model's hyperparameters optimized through cross-validation on the training set. For each EU variable, 1322 candidate JP variables were ranked based on NLP-derived similarity scores or RF's probability scores, denoting their likelihood to match the EU variable. Ranking performance was measured by top-n hit ratio (HRn) and mean reciprocal rank (MRR). Results:E5 performed best among individual methods, achieving 0.90 HR-30 and 0.70 MRR. RF performed better than E5 on all metrics over 50 trials (P less than 0.001) and achieved an average HR 30 of 0.98 and MRR of 0.73. LLM-derived features contributed most to RF's performance. One major cause of errors in automatic variable matching was ambiguous variable definitions within data dictionaries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02730v1-abstract-full').style.display = 'none'; document.getElementById('2411.02730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01099">arXiv:2411.01099</a> <span> [<a href="https://arxiv.org/pdf/2411.01099">pdf</a>, <a href="https://arxiv.org/format/2411.01099">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Few-Class Arena: A Benchmark for Efficient Selection of Vision Models and Dataset Difficulty Measurement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+B+B">Bryan Bo Cao</a>, <a href="/search/cs?searchtype=author&query=O%27Gorman%2C+L">Lawrence O'Gorman</a>, <a href="/search/cs?searchtype=author&query=Coss%2C+M">Michael Coss</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubham Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01099v1-abstract-short" style="display: inline;"> We propose Few-Class Arena (FCA), as a unified benchmark with focus on testing efficient image classification models for few classes. A wide variety of benchmark datasets with many classes (80-1000) have been created to assist Computer Vision architectural evolution. An increasing number of vision models are evaluated with these many-class datasets. However, real-world applications often involve s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01099v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01099v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01099v1-abstract-full" style="display: none;"> We propose Few-Class Arena (FCA), as a unified benchmark with focus on testing efficient image classification models for few classes. A wide variety of benchmark datasets with many classes (80-1000) have been created to assist Computer Vision architectural evolution. An increasing number of vision models are evaluated with these many-class datasets. However, real-world applications often involve substantially fewer classes of interest (2-10). This gap between many and few classes makes it difficult to predict performance of the few-class applications using models trained on the available many-class datasets. To date, little has been offered to evaluate models in this Few-Class Regime. We conduct a systematic evaluation of the ResNet family trained on ImageNet subsets from 2 to 1000 classes, and test a wide spectrum of Convolutional Neural Networks and Transformer architectures over ten datasets by using our newly proposed FCA tool. Furthermore, to aid an up-front assessment of dataset difficulty and a more efficient selection of models, we incorporate a difficulty measure as a function of class similarity. FCA offers a new tool for efficient machine learning in the Few-Class Regime, with goals ranging from a new efficient class similarity proposal, to lightweight model architecture design, to a new scaling law. FCA is user-friendly and can be easily extended to new models and datasets, facilitating future research work. Our benchmark is available at https://github.com/fewclassarena/fca. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01099v1-abstract-full').style.display = 'none'; document.getElementById('2411.01099v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 27 pages including References and Appendix, 20 figures, 5 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T45 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.0; I.4.9 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00369">arXiv:2411.00369</a> <span> [<a href="https://arxiv.org/pdf/2411.00369">pdf</a>, <a href="https://arxiv.org/format/2411.00369">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GRS-QA -- Graph Reasoning-Structured Question Answering Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pahilajani%2C+A">Anish Pahilajani</a>, <a href="/search/cs?searchtype=author&query=Trivedi%2C+D">Devasha Trivedi</a>, <a href="/search/cs?searchtype=author&query=Shuai%2C+J">Jincen Shuai</a>, <a href="/search/cs?searchtype=author&query=Yone%2C+K+S">Khin S. Yone</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S+R">Samyak Rajesh Jain</a>, <a href="/search/cs?searchtype=author&query=Park%2C+N">Namyong Park</a>, <a href="/search/cs?searchtype=author&query=Rossi%2C+R+A">Ryan A. Rossi</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+N+K">Nesreen K. Ahmed</a>, <a href="/search/cs?searchtype=author&query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00369v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) have excelled in multi-hop question-answering (M-QA) due to their advanced reasoning abilities. However, the impact of the inherent reasoning structures on LLM M-QA performance remains unclear, largely due to the absence of QA datasets that provide fine-grained reasoning structures. To address this gap, we introduce the Graph Reasoning-Structured Question Answering Dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00369v3-abstract-full').style.display = 'inline'; document.getElementById('2411.00369v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00369v3-abstract-full" style="display: none;"> Large Language Models (LLMs) have excelled in multi-hop question-answering (M-QA) due to their advanced reasoning abilities. However, the impact of the inherent reasoning structures on LLM M-QA performance remains unclear, largely due to the absence of QA datasets that provide fine-grained reasoning structures. To address this gap, we introduce the Graph Reasoning-Structured Question Answering Dataset (GRS-QA), which includes both semantic contexts and reasoning structures for QA pairs. Unlike existing M-QA datasets, where different reasoning structures are entangled together, GRS-QA explicitly captures intricate reasoning pathways by constructing reasoning graphs, where nodes represent textual contexts and edges denote logical flows. These reasoning graphs of different structures enable a fine-grained evaluation of LLM reasoning capabilities across various reasoning structures. Our empirical analysis reveals that LLMs perform differently when handling questions with varying reasoning structures. This finding facilitates the exploration of textual structures as compared with semantics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00369v3-abstract-full').style.display = 'none'; document.getElementById('2411.00369v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 24 figures, 10 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23554">arXiv:2410.23554</a> <span> [<a href="https://arxiv.org/pdf/2410.23554">pdf</a>, <a href="https://arxiv.org/format/2410.23554">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Prosody as a Teaching Signal for Agent Learning: Exploratory Studies and Algorithmic Implications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Knierim%2C+M">Matilda Knierim</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sahil Jain</a>, <a href="/search/cs?searchtype=author&query=Aydo%C4%9Fan%2C+M+H">Murat Han Aydo臒an</a>, <a href="/search/cs?searchtype=author&query=Mitra%2C+K">Kenneth Mitra</a>, <a href="/search/cs?searchtype=author&query=Desai%2C+K">Kush Desai</a>, <a href="/search/cs?searchtype=author&query=Saran%2C+A">Akanksha Saran</a>, <a href="/search/cs?searchtype=author&query=Baraka%2C+K">Kim Baraka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23554v1-abstract-short" style="display: inline;"> Agent learning from human interaction often relies on explicit signals, but implicit social cues, such as prosody in speech, could provide valuable information for more effective learning. This paper advocates for the integration of prosody as a teaching signal to enhance agent learning from human teachers. Through two exploratory studies--one examining voice feedback in an interactive reinforceme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23554v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23554v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23554v1-abstract-full" style="display: none;"> Agent learning from human interaction often relies on explicit signals, but implicit social cues, such as prosody in speech, could provide valuable information for more effective learning. This paper advocates for the integration of prosody as a teaching signal to enhance agent learning from human teachers. Through two exploratory studies--one examining voice feedback in an interactive reinforcement learning setup and the other analyzing restricted audio from human demonstrations in three Atari games--we demonstrate that prosody carries significant information about task dynamics. Our findings suggest that prosodic features, when coupled with explicit feedback, can enhance reinforcement learning outcomes. Moreover, we propose guidelines for prosody-sensitive algorithm design and discuss insights into teaching behavior. Our work underscores the potential of leveraging prosody as an implicit signal for more efficient agent learning, thus advancing human-agent interaction paradigms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23554v1-abstract-full').style.display = 'none'; document.getElementById('2410.23554v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at the 26th ACM International Conference on Multimodal Interaction (ICMI) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21276">arXiv:2410.21276</a> <span> [<a href="https://arxiv.org/pdf/2410.21276">pdf</a>, <a href="https://arxiv.org/format/2410.21276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> GPT-4o System Card </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=OpenAI"> OpenAI</a>, <a href="/search/cs?searchtype=author&query=%3A"> :</a>, <a href="/search/cs?searchtype=author&query=Hurst%2C+A">Aaron Hurst</a>, <a href="/search/cs?searchtype=author&query=Lerer%2C+A">Adam Lerer</a>, <a href="/search/cs?searchtype=author&query=Goucher%2C+A+P">Adam P. Goucher</a>, <a href="/search/cs?searchtype=author&query=Perelman%2C+A">Adam Perelman</a>, <a href="/search/cs?searchtype=author&query=Ramesh%2C+A">Aditya Ramesh</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+A">Aidan Clark</a>, <a href="/search/cs?searchtype=author&query=Ostrow%2C+A">AJ Ostrow</a>, <a href="/search/cs?searchtype=author&query=Welihinda%2C+A">Akila Welihinda</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+A">Alan Hayes</a>, <a href="/search/cs?searchtype=author&query=Radford%2C+A">Alec Radford</a>, <a href="/search/cs?searchtype=author&query=M%C4%85dry%2C+A">Aleksander M膮dry</a>, <a href="/search/cs?searchtype=author&query=Baker-Whitcomb%2C+A">Alex Baker-Whitcomb</a>, <a href="/search/cs?searchtype=author&query=Beutel%2C+A">Alex Beutel</a>, <a href="/search/cs?searchtype=author&query=Borzunov%2C+A">Alex Borzunov</a>, <a href="/search/cs?searchtype=author&query=Carney%2C+A">Alex Carney</a>, <a href="/search/cs?searchtype=author&query=Chow%2C+A">Alex Chow</a>, <a href="/search/cs?searchtype=author&query=Kirillov%2C+A">Alex Kirillov</a>, <a href="/search/cs?searchtype=author&query=Nichol%2C+A">Alex Nichol</a>, <a href="/search/cs?searchtype=author&query=Paino%2C+A">Alex Paino</a>, <a href="/search/cs?searchtype=author&query=Renzin%2C+A">Alex Renzin</a>, <a href="/search/cs?searchtype=author&query=Passos%2C+A+T">Alex Tachard Passos</a>, <a href="/search/cs?searchtype=author&query=Kirillov%2C+A">Alexander Kirillov</a>, <a href="/search/cs?searchtype=author&query=Christakis%2C+A">Alexi Christakis</a> , et al. (395 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21276v1-abstract-short" style="display: inline;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It's trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 mil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21276v1-abstract-full" style="display: none;"> GPT-4o is an autoregressive omni model that accepts as input any combination of text, audio, image, and video, and generates any combination of text, audio, and image outputs. It's trained end-to-end across text, vision, and audio, meaning all inputs and outputs are processed by the same neural network. GPT-4o can respond to audio inputs in as little as 232 milliseconds, with an average of 320 milliseconds, which is similar to human response time in conversation. It matches GPT-4 Turbo performance on text in English and code, with significant improvement on text in non-English languages, while also being much faster and 50\% cheaper in the API. GPT-4o is especially better at vision and audio understanding compared to existing models. In line with our commitment to building AI safely and consistent with our voluntary commitments to the White House, we are sharing the GPT-4o System Card, which includes our Preparedness Framework evaluations. In this System Card, we provide a detailed look at GPT-4o's capabilities, limitations, and safety evaluations across multiple categories, focusing on speech-to-speech while also evaluating text and image capabilities, and measures we've implemented to ensure the model is safe and aligned. We also include third-party assessments on dangerous capabilities, as well as discussion of potential societal impacts of GPT-4o's text and vision capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21276v1-abstract-full').style.display = 'none'; document.getElementById('2410.21276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17579">arXiv:2410.17579</a> <span> [<a href="https://arxiv.org/pdf/2410.17579">pdf</a>, <a href="https://arxiv.org/format/2410.17579">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Bonsai: Gradient-free Graph Distillation for Node Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Mridul Gupta</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Samyak Jain</a>, <a href="/search/cs?searchtype=author&query=Ramani%2C+V">Vansh Ramani</a>, <a href="/search/cs?searchtype=author&query=Kodamana%2C+H">Hariprasad Kodamana</a>, <a href="/search/cs?searchtype=author&query=Ranu%2C+S">Sayan Ranu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17579v2-abstract-short" style="display: inline;"> Graph distillation has emerged as a promising avenue to enable scalable training of GNNs by compressing the training dataset while preserving essential graph characteristics. Our study uncovers significant shortcomings in current graph distillation techniques. First, the majority of the algorithms paradoxically require training on the full dataset to perform distillation. Second, due to their grad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17579v2-abstract-full').style.display = 'inline'; document.getElementById('2410.17579v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17579v2-abstract-full" style="display: none;"> Graph distillation has emerged as a promising avenue to enable scalable training of GNNs by compressing the training dataset while preserving essential graph characteristics. Our study uncovers significant shortcomings in current graph distillation techniques. First, the majority of the algorithms paradoxically require training on the full dataset to perform distillation. Second, due to their gradient-emulating approach, these methods require fresh distillation for any change in hyperparameters or GNN architecture, limiting their flexibility and reusability. Finally, they fail to achieve substantial size reduction due to synthesizing fully-connected, edge-weighted graphs. To address these challenges, we present Bonsai, a novel graph distillation method empowered by the observation that \textit{computation trees} form the fundamental processing units of message-passing GNNs. Bonsai distills datasets by encoding a careful selection of \textit{exemplar} trees that maximize the representation of all computation trees in the training set. This unique approach imparts Bonsai as the first linear-time, model-agnostic graph distillation algorithm for node classification that outperforms existing baselines across $6$ real-world datasets on accuracy, while being $22$ times faster on average. Bonsai is grounded in rigorous mathematical guarantees on the adopted approximation strategies making it robust to GNN architectures, datasets, and parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17579v2-abstract-full').style.display = 'none'; document.getElementById('2410.17579v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14235">arXiv:2410.14235</a> <span> [<a href="https://arxiv.org/pdf/2410.14235">pdf</a>, <a href="https://arxiv.org/format/2410.14235">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust Knowledge Representations in Multilingual LLMs for Equivalence and Inheritance based Consistent Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arora%2C+G">Gaurav Arora</a>, <a href="/search/cs?searchtype=author&query=Merugu%2C+S">Srujana Merugu</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shreya Jain</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+V">Vaibhav Saxena</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14235v1-abstract-short" style="display: inline;"> Reasoning and linguistic skills form the cornerstone of human intelligence, facilitating problem-solving and decision-making. Recent advances in Large Language Models (LLMs) have led to impressive linguistic capabilities and emergent reasoning behaviors, fueling widespread adoption across application domains. However, LLMs still struggle with complex reasoning tasks, highlighting their systemic li… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14235v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14235v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14235v1-abstract-full" style="display: none;"> Reasoning and linguistic skills form the cornerstone of human intelligence, facilitating problem-solving and decision-making. Recent advances in Large Language Models (LLMs) have led to impressive linguistic capabilities and emergent reasoning behaviors, fueling widespread adoption across application domains. However, LLMs still struggle with complex reasoning tasks, highlighting their systemic limitations. In this work, we focus on evaluating whether LLMs have the requisite representations to reason using two foundational relationships: "equivalence" and "inheritance". We introduce novel tasks and benchmarks spanning six languages and observe that current SOTA LLMs often produce conflicting answers to the same questions across languages in 17.3-57.5% of cases and violate inheritance constraints in up to 37.2% cases. To enhance consistency across languages, we propose novel "Compositional Representations" where tokens are represented as composition of equivalent tokens across languages, with resulting conflict reduction (up to -4.7%) indicating benefits of shared LLM representations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14235v1-abstract-full').style.display = 'none'; document.getElementById('2410.14235v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12567">arXiv:2410.12567</a> <span> [<a href="https://arxiv.org/pdf/2410.12567">pdf</a>, <a href="https://arxiv.org/format/2410.12567">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> SeQuiFi: Mitigating Catastrophic Forgetting in Speech Emotion Recognition with Sequential Class-Finetuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sarthak Jain</a>, <a href="/search/cs?searchtype=author&query=Phukan%2C+O+C">Orchid Chetia Phukan</a>, <a href="/search/cs?searchtype=author&query=Behera%2C+S+R">Swarup Ranjan Behera</a>, <a href="/search/cs?searchtype=author&query=Buduru%2C+A+B">Arun Balaji Buduru</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+R">Rajesh Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12567v1-abstract-short" style="display: inline;"> In this work, we introduce SeQuiFi, a novel approach for mitigating catastrophic forgetting (CF) in speech emotion recognition (SER). SeQuiFi adopts a sequential class-finetuning strategy, where the model is fine-tuned incrementally on one emotion class at a time, preserving and enhancing retention for each class. While various state-of-the-art (SOTA) methods, such as regularization-based, memory-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12567v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12567v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12567v1-abstract-full" style="display: none;"> In this work, we introduce SeQuiFi, a novel approach for mitigating catastrophic forgetting (CF) in speech emotion recognition (SER). SeQuiFi adopts a sequential class-finetuning strategy, where the model is fine-tuned incrementally on one emotion class at a time, preserving and enhancing retention for each class. While various state-of-the-art (SOTA) methods, such as regularization-based, memory-based, and weight-averaging techniques, have been proposed to address CF, it still remains a challenge, particularly with diverse and multilingual datasets. Through extensive experiments, we demonstrate that SeQuiFi significantly outperforms both vanilla fine-tuning and SOTA continual learning techniques in terms of accuracy and F1 scores on multiple benchmark SER datasets, including CREMA-D, RAVDESS, Emo-DB, MESD, and SHEMO, covering different languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12567v1-abstract-full').style.display = 'none'; document.getElementById('2410.12567v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T45 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11233">arXiv:2410.11233</a> <span> [<a href="https://arxiv.org/pdf/2410.11233">pdf</a>, <a href="https://arxiv.org/format/2410.11233">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3636534.3695903">10.1145/3636534.3695903 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Representation Similarity: A Better Guidance of DNN Layer Sharing for Edge Computing without Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+B+B">Bryan Bo Cao</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Abhinav Sharma</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+M">Manavjeet Singh</a>, <a href="/search/cs?searchtype=author&query=Gandhi%2C+A">Anshul Gandhi</a>, <a href="/search/cs?searchtype=author&query=Das%2C+S">Samir Das</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubham Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11233v1-abstract-short" style="display: inline;"> Edge computing has emerged as an alternative to reduce transmission and processing delay and preserve privacy of the video streams. However, the ever-increasing complexity of Deep Neural Networks (DNNs) used in video-based applications (e.g. object detection) exerts pressure on memory-constrained edge devices. Model merging is proposed to reduce the DNNs' memory footprint by keeping only one copy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11233v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11233v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11233v1-abstract-full" style="display: none;"> Edge computing has emerged as an alternative to reduce transmission and processing delay and preserve privacy of the video streams. However, the ever-increasing complexity of Deep Neural Networks (DNNs) used in video-based applications (e.g. object detection) exerts pressure on memory-constrained edge devices. Model merging is proposed to reduce the DNNs' memory footprint by keeping only one copy of merged layers' weights in memory. In existing model merging techniques, (i) only architecturally identical layers can be shared; (ii) requires computationally expensive retraining in the cloud; (iii) assumes the availability of ground truth for retraining. The re-evaluation of a merged model's performance, however, requires a validation dataset with ground truth, typically runs at the cloud. Common metrics to guide the selection of shared layers include the size or computational cost of shared layers or representation size. We propose a new model merging scheme by sharing representations (i.e., outputs of layers) at the edge, guided by representation similarity S. We show that S is extremely highly correlated with merged model's accuracy with Pearson Correlation Coefficient |r| > 0.94 than other metrics, demonstrating that representation similarity can serve as a strong validation accuracy indicator without ground truth. We present our preliminary results of the newly proposed model merging scheme with identified challenges, demonstrating a promising research future direction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11233v1-abstract-full').style.display = 'none'; document.getElementById('2410.11233v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages, 4 figures, ACM MobiCom '24, November 18-22, 2024, Washington D.C., DC, USA</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68M14 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> C.2.4; I.4.0; I.4.9 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06542">arXiv:2410.06542</a> <span> [<a href="https://arxiv.org/pdf/2410.06542">pdf</a>, <a href="https://arxiv.org/format/2410.06542">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MedImageInsight: An Open-Source Embedding Model for General Domain Medical Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Codella%2C+N+C+F">Noel C. F. Codella</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Ying Jin</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shrey Jain</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+H+H">Ho Hin Lee</a>, <a href="/search/cs?searchtype=author&query=Abacha%2C+A+B">Asma Ben Abacha</a>, <a href="/search/cs?searchtype=author&query=Santamaria-Pang%2C+A">Alberto Santamaria-Pang</a>, <a href="/search/cs?searchtype=author&query=Guyman%2C+W">Will Guyman</a>, <a href="/search/cs?searchtype=author&query=Sangani%2C+N">Naiteek Sangani</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Poon%2C+H">Hoifung Poon</a>, <a href="/search/cs?searchtype=author&query=Hyland%2C+S">Stephanie Hyland</a>, <a href="/search/cs?searchtype=author&query=Bannur%2C+S">Shruthi Bannur</a>, <a href="/search/cs?searchtype=author&query=Alvarez-Valle%2C+J">Javier Alvarez-Valle</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xue Li</a>, <a href="/search/cs?searchtype=author&query=Garrett%2C+J">John Garrett</a>, <a href="/search/cs?searchtype=author&query=McMillan%2C+A">Alan McMillan</a>, <a href="/search/cs?searchtype=author&query=Rajguru%2C+G">Gaurav Rajguru</a>, <a href="/search/cs?searchtype=author&query=Maddi%2C+M">Madhu Maddi</a>, <a href="/search/cs?searchtype=author&query=Vijayrania%2C+N">Nilesh Vijayrania</a>, <a href="/search/cs?searchtype=author&query=Bhimai%2C+R">Rehaan Bhimai</a>, <a href="/search/cs?searchtype=author&query=Mecklenburg%2C+N">Nick Mecklenburg</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+R">Rupal Jain</a>, <a href="/search/cs?searchtype=author&query=Holstein%2C+D">Daniel Holstein</a>, <a href="/search/cs?searchtype=author&query=Gaur%2C+N">Naveen Gaur</a> , et al. (6 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06542v1-abstract-short" style="display: inline;"> In this work, we present MedImageInsight, an open-source medical imaging embedding model. MedImageInsight is trained on medical images with associated text and labels across a diverse collection of domains, including X-Ray, CT, MRI, dermoscopy, OCT, fundus photography, ultrasound, histopathology, and mammography. Rigorous evaluations demonstrate MedImageInsight's ability to achieve state-of-the-ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06542v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06542v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06542v1-abstract-full" style="display: none;"> In this work, we present MedImageInsight, an open-source medical imaging embedding model. MedImageInsight is trained on medical images with associated text and labels across a diverse collection of domains, including X-Ray, CT, MRI, dermoscopy, OCT, fundus photography, ultrasound, histopathology, and mammography. Rigorous evaluations demonstrate MedImageInsight's ability to achieve state-of-the-art (SOTA) or human expert level performance across classification, image-image search, and fine-tuning tasks. Specifically, on public datasets, MedImageInsight achieves SOTA in CT 3D medical image retrieval, as well as SOTA in disease classification and search for chest X-ray, dermatology, and OCT imaging. Furthermore, MedImageInsight achieves human expert performance in bone age estimation (on both public and partner data), as well as AUC above 0.9 in most other domains. When paired with a text decoder, MedImageInsight achieves near SOTA level single image report findings generation with less than 10\% the parameters of other models. Compared to fine-tuning GPT-4o with only MIMIC-CXR data for the same task, MedImageInsight outperforms in clinical metrics, but underperforms on lexical metrics where GPT-4o sets a new SOTA. Importantly for regulatory purposes, MedImageInsight can generate ROC curves, adjust sensitivity and specificity based on clinical need, and provide evidence-based decision support through image-image search (which can also enable retrieval augmented generation). In an independent clinical evaluation of image-image search in chest X-ray, MedImageInsight outperformed every other publicly available foundation model evaluated by large margins (over 6 points AUC), and significantly outperformed other models in terms of AI fairness (across age and gender). We hope releasing MedImageInsight will help enhance collective progress in medical imaging AI research and development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06542v1-abstract-full').style.display = 'none'; document.getElementById('2410.06542v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03741">arXiv:2410.03741</a> <span> [<a href="https://arxiv.org/pdf/2410.03741">pdf</a>, <a href="https://arxiv.org/format/2410.03741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Democratization of Subspeciality Medical Expertise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=O%27Sullivan%2C+J+W">Jack W. O'Sullivan</a>, <a href="/search/cs?searchtype=author&query=Palepu%2C+A">Anil Palepu</a>, <a href="/search/cs?searchtype=author&query=Saab%2C+K">Khaled Saab</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+W">Wei-Hung Weng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yong Cheng</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+E">Emily Chu</a>, <a href="/search/cs?searchtype=author&query=Desai%2C+Y">Yaanik Desai</a>, <a href="/search/cs?searchtype=author&query=Elezaby%2C+A">Aly Elezaby</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+S">Daniel Seung Kim</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+R">Roy Lan</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+W">Wilson Tang</a>, <a href="/search/cs?searchtype=author&query=Tapaskar%2C+N">Natalie Tapaskar</a>, <a href="/search/cs?searchtype=author&query=Parikh%2C+V">Victoria Parikh</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S+S">Sneha S. Jain</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+K">Kavita Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Mansfield%2C+P">Philip Mansfield</a>, <a href="/search/cs?searchtype=author&query=Webster%2C+D">Dale Webster</a>, <a href="/search/cs?searchtype=author&query=Gottweis%2C+J">Juraj Gottweis</a>, <a href="/search/cs?searchtype=author&query=Barral%2C+J">Joelle Barral</a>, <a href="/search/cs?searchtype=author&query=Schaekermann%2C+M">Mike Schaekermann</a>, <a href="/search/cs?searchtype=author&query=Tanno%2C+R">Ryutaro Tanno</a>, <a href="/search/cs?searchtype=author&query=Mahdavi%2C+S+S">S. Sara Mahdavi</a>, <a href="/search/cs?searchtype=author&query=Natarajan%2C+V">Vivek Natarajan</a>, <a href="/search/cs?searchtype=author&query=Karthikesalingam%2C+A">Alan Karthikesalingam</a>, <a href="/search/cs?searchtype=author&query=Ashley%2C+E">Euan Ashley</a> , et al. (1 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03741v1-abstract-short" style="display: inline;"> The scarcity of subspecialist medical expertise, particularly in rare, complex and life-threatening diseases, poses a significant challenge for healthcare delivery. This issue is particularly acute in cardiology where timely, accurate management determines outcomes. We explored the potential of AMIE (Articulate Medical Intelligence Explorer), a large language model (LLM)-based experimental AI syst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03741v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03741v1-abstract-full" style="display: none;"> The scarcity of subspecialist medical expertise, particularly in rare, complex and life-threatening diseases, poses a significant challenge for healthcare delivery. This issue is particularly acute in cardiology where timely, accurate management determines outcomes. We explored the potential of AMIE (Articulate Medical Intelligence Explorer), a large language model (LLM)-based experimental AI system optimized for diagnostic dialogue, to potentially augment and support clinical decision-making in this challenging context. We curated a real-world dataset of 204 complex cases from a subspecialist cardiology practice, including results for electrocardiograms, echocardiograms, cardiac MRI, genetic tests, and cardiopulmonary stress tests. We developed a ten-domain evaluation rubric used by subspecialists to evaluate the quality of diagnosis and clinical management plans produced by general cardiologists or AMIE, the latter enhanced with web-search and self-critique capabilities. AMIE was rated superior to general cardiologists for 5 of the 10 domains (with preference ranging from 9% to 20%), and equivalent for the rest. Access to AMIE's response improved cardiologists' overall response quality in 63.7% of cases while lowering quality in just 3.4%. Cardiologists' responses with access to AMIE were superior to cardiologist responses without access to AMIE for all 10 domains. Qualitative examinations suggest AMIE and general cardiologist could complement each other, with AMIE thorough and sensitive, while general cardiologist concise and specific. Overall, our results suggest that specialized medical LLMs have the potential to augment general cardiologists' capabilities by bridging gaps in subspecialty expertise, though further research and validation are essential for wide clinical utility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03741v1-abstract-full').style.display = 'none'; document.getElementById('2410.03741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01790">arXiv:2410.01790</a> <span> [<a href="https://arxiv.org/pdf/2410.01790">pdf</a>, <a href="https://arxiv.org/format/2410.01790">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Open Human-Robot Collaboration using Decentralized Inverse Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Suresh%2C+P+S">Prasanth Sengadu Suresh</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Siddarth Jain</a>, <a href="/search/cs?searchtype=author&query=Doshi%2C+P">Prashant Doshi</a>, <a href="/search/cs?searchtype=author&query=Romeres%2C+D">Diego Romeres</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01790v1-abstract-short" style="display: inline;"> The growing interest in human-robot collaboration (HRC), where humans and robots cooperate towards shared goals, has seen significant advancements over the past decade. While previous research has addressed various challenges, several key issues remain unresolved. Many domains within HRC involve activities that do not necessarily require human presence throughout the entire task. Existing literatu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01790v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01790v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01790v1-abstract-full" style="display: none;"> The growing interest in human-robot collaboration (HRC), where humans and robots cooperate towards shared goals, has seen significant advancements over the past decade. While previous research has addressed various challenges, several key issues remain unresolved. Many domains within HRC involve activities that do not necessarily require human presence throughout the entire task. Existing literature typically models HRC as a closed system, where all agents are present for the entire duration of the task. In contrast, an open model offers flexibility by allowing an agent to enter and exit the collaboration as needed, enabling them to concurrently manage other tasks. In this paper, we introduce a novel multiagent framework called oDec-MDP, designed specifically to model open HRC scenarios where agents can join or leave tasks flexibly during execution. We generalize a recent multiagent inverse reinforcement learning method - Dec-AIRL to learn from open systems modeled using the oDec-MDP. Our method is validated through experiments conducted in both a simplified toy firefighting domain and a realistic dyadic human-robot collaborative assembly. Results show that our framework and learning method improves upon its closed system counterpart. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01790v1-abstract-full').style.display = 'none'; document.getElementById('2410.01790v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01627">arXiv:2410.01627</a> <span> [<a href="https://arxiv.org/pdf/2410.01627">pdf</a>, <a href="https://arxiv.org/format/2410.01627">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Intent Detection in the Age of LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arora%2C+G">Gaurav Arora</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shreya Jain</a>, <a href="/search/cs?searchtype=author&query=Merugu%2C+S">Srujana Merugu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01627v1-abstract-short" style="display: inline;"> Intent detection is a critical component of task-oriented dialogue systems (TODS) which enables the identification of suitable actions to address user utterances at each dialog turn. Traditional approaches relied on computationally efficient supervised sentence transformer encoder models, which require substantial training data and struggle with out-of-scope (OOS) detection. The emergence of gener… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01627v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01627v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01627v1-abstract-full" style="display: none;"> Intent detection is a critical component of task-oriented dialogue systems (TODS) which enables the identification of suitable actions to address user utterances at each dialog turn. Traditional approaches relied on computationally efficient supervised sentence transformer encoder models, which require substantial training data and struggle with out-of-scope (OOS) detection. The emergence of generative large language models (LLMs) with intrinsic world knowledge presents new opportunities to address these challenges. In this work, we adapt 7 SOTA LLMs using adaptive in-context learning and chain-of-thought prompting for intent detection, and compare their performance with contrastively fine-tuned sentence transformer (SetFit) models to highlight prediction quality and latency tradeoff. We propose a hybrid system using uncertainty based routing strategy to combine the two approaches that along with negative data augmentation results in achieving the best of both worlds ( i.e. within 2% of native LLM accuracy with 50% less latency). To better understand LLM OOS detection capabilities, we perform controlled experiments revealing that this capability is significantly influenced by the scope of intent labels and the size of the label space. We also introduce a two-step approach utilizing internal LLM representations, demonstrating empirical gains in OOS detection accuracy and F1-score by >5% for the Mistral-7B model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01627v1-abstract-full').style.display = 'none'; document.getElementById('2410.01627v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2024 Industry Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00812">arXiv:2410.00812</a> <span> [<a href="https://arxiv.org/pdf/2410.00812">pdf</a>, <a href="https://arxiv.org/format/2410.00812">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> A generative framework to bridge data-driven models and scientific theories in language neuroscience </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Antonello%2C+R">Richard Antonello</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+C">Chandan Singh</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shailee Jain</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+A">Aliyah Hsu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianfeng Gao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bin Yu</a>, <a href="/search/cs?searchtype=author&query=Huth%2C+A">Alexander Huth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00812v1-abstract-short" style="display: inline;"> Representations from large language models are highly effective at predicting BOLD fMRI responses to language stimuli. However, these representations are largely opaque: it is unclear what features of the language stimulus drive the response in each brain area. We present generative explanation-mediated validation, a framework for generating concise explanations of language selectivity in the brai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00812v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00812v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00812v1-abstract-full" style="display: none;"> Representations from large language models are highly effective at predicting BOLD fMRI responses to language stimuli. However, these representations are largely opaque: it is unclear what features of the language stimulus drive the response in each brain area. We present generative explanation-mediated validation, a framework for generating concise explanations of language selectivity in the brain and then validating those explanations in follow-up experiments that use synthetic stimuli. This approach is successful at explaining selectivity both in individual voxels and cortical regions of interest (ROIs).We show that explanatory accuracy is closely related to the predictive power and stability of the underlying statistical models. These results demonstrate that LLMs can be used to bridge the widening gap between data-driven models and formal scientific theories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00812v1-abstract-full').style.display = 'none'; document.getElementById('2410.00812v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00307">arXiv:2410.00307</a> <span> [<a href="https://arxiv.org/pdf/2410.00307">pdf</a>, <a href="https://arxiv.org/format/2410.00307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RadGazeGen: Radiomics and Gaze-guided Medical Image Generation using Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhattacharya%2C+M">Moinak Bhattacharya</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubham Jain</a>, <a href="/search/cs?searchtype=author&query=Prasanna%2C+P">Prateek Prasanna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00307v1-abstract-short" style="display: inline;"> In this work, we present RadGazeGen, a novel framework for integrating experts' eye gaze patterns and radiomic feature maps as controls to text-to-image diffusion models for high fidelity medical image generation. Despite the recent success of text-to-image diffusion models, text descriptions are often found to be inadequate and fail to convey detailed disease-specific information to these models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00307v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00307v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00307v1-abstract-full" style="display: none;"> In this work, we present RadGazeGen, a novel framework for integrating experts' eye gaze patterns and radiomic feature maps as controls to text-to-image diffusion models for high fidelity medical image generation. Despite the recent success of text-to-image diffusion models, text descriptions are often found to be inadequate and fail to convey detailed disease-specific information to these models to generate clinically accurate images. The anatomy, disease texture patterns, and location of the disease are extremely important to generate realistic images; moreover the fidelity of image generation can have significant implications in downstream tasks involving disease diagnosis or treatment repose assessment. Hence, there is a growing need to carefully define the controls used in diffusion models for medical image generation. Eye gaze patterns of radiologists are important visuo-cognitive information, indicative of subtle disease patterns and spatial location. Radiomic features further provide important subvisual cues regarding disease phenotype. In this work, we propose to use these gaze patterns in combination with standard radiomics descriptors, as controls, to generate anatomically correct and disease-aware medical images. RadGazeGen is evaluated for image generation quality and diversity on the REFLACX dataset. To demonstrate clinical applicability, we also show classification performance on the generated images from the CheXpert test set (n=500) and long-tailed learning performance on the MIMIC-CXR-LT test set (n=23550). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00307v1-abstract-full').style.display = 'none'; document.getElementById('2410.00307v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14131">arXiv:2409.14131</a> <span> [<a href="https://arxiv.org/pdf/2409.14131">pdf</a>, <a href="https://arxiv.org/format/2409.14131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Are Music Foundation Models Better at Singing Voice Deepfake Detection? Far-Better Fuse them with Speech Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phukan%2C+O+C">Orchid Chetia Phukan</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sarthak Jain</a>, <a href="/search/cs?searchtype=author&query=Behera%2C+S+R">Swarup Ranjan Behera</a>, <a href="/search/cs?searchtype=author&query=Buduru%2C+A+B">Arun Balaji Buduru</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+R">Rajesh Sharma</a>, <a href="/search/cs?searchtype=author&query=Prasanna%2C+S+R+M">S. R Mahadeva Prasanna</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14131v1-abstract-short" style="display: inline;"> In this study, for the first time, we extensively investigate whether music foundation models (MFMs) or speech foundation models (SFMs) work better for singing voice deepfake detection (SVDD), which has recently attracted attention in the research community. For this, we perform a comprehensive comparative study of state-of-the-art (SOTA) MFMs (MERT variants and music2vec) and SFMs (pre-trained fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14131v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14131v1-abstract-full" style="display: none;"> In this study, for the first time, we extensively investigate whether music foundation models (MFMs) or speech foundation models (SFMs) work better for singing voice deepfake detection (SVDD), which has recently attracted attention in the research community. For this, we perform a comprehensive comparative study of state-of-the-art (SOTA) MFMs (MERT variants and music2vec) and SFMs (pre-trained for general speech representation learning as well as speaker recognition). We show that speaker recognition SFM representations perform the best amongst all the foundation models (FMs), and this performance can be attributed to its higher efficacy in capturing the pitch, tone, intensity, etc, characteristics present in singing voices. To our end, we also explore the fusion of FMs for exploiting their complementary behavior for improved SVDD, and we propose a novel framework, FIONA for the same. With FIONA, through the synchronization of x-vector (speaker recognition SFM) and MERT-v1-330M (MFM), we report the best performance with the lowest Equal Error Rate (EER) of 13.74 %, beating all the individual FMs as well as baseline FM fusions and achieving SOTA results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14131v1-abstract-full').style.display = 'none'; document.getElementById('2409.14131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2025</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T45 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12640">arXiv:2409.12640</a> <span> [<a href="https://arxiv.org/pdf/2409.12640">pdf</a>, <a href="https://arxiv.org/format/2409.12640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Michelangelo: Long Context Evaluations Beyond Haystacks via Latent Structure Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Vodrahalli%2C+K">Kiran Vodrahalli</a>, <a href="/search/cs?searchtype=author&query=Ontanon%2C+S">Santiago Ontanon</a>, <a href="/search/cs?searchtype=author&query=Tripuraneni%2C+N">Nilesh Tripuraneni</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kelvin Xu</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanil Jain</a>, <a href="/search/cs?searchtype=author&query=Shivanna%2C+R">Rakesh Shivanna</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+J">Jeffrey Hui</a>, <a href="/search/cs?searchtype=author&query=Dikkala%2C+N">Nishanth Dikkala</a>, <a href="/search/cs?searchtype=author&query=Kazemi%2C+M">Mehran Kazemi</a>, <a href="/search/cs?searchtype=author&query=Fatemi%2C+B">Bahare Fatemi</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Dyer%2C+E">Ethan Dyer</a>, <a href="/search/cs?searchtype=author&query=Shakeri%2C+S">Siamak Shakeri</a>, <a href="/search/cs?searchtype=author&query=Vij%2C+R">Roopali Vij</a>, <a href="/search/cs?searchtype=author&query=Mehta%2C+H">Harsh Mehta</a>, <a href="/search/cs?searchtype=author&query=Ramasesh%2C+V">Vinay Ramasesh</a>, <a href="/search/cs?searchtype=author&query=Le%2C+Q">Quoc Le</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+E">Ed Chi</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yifeng Lu</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&query=Lespiau%2C+J">Jean-Baptiste Lespiau</a>, <a href="/search/cs?searchtype=author&query=Attaluri%2C+N">Nithya Attaluri</a>, <a href="/search/cs?searchtype=author&query=Olszewska%2C+K">Kate Olszewska</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12640v2-abstract-short" style="display: inline;"> We introduce Michelangelo: a minimal, synthetic, and unleaked long-context reasoning evaluation for large language models which is also easy to automatically score. This evaluation is derived via a novel, unifying framework for evaluations over arbitrarily long contexts which measure the model's ability to do more than retrieve a single piece of information from its context. The central idea of th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12640v2-abstract-full').style.display = 'inline'; document.getElementById('2409.12640v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12640v2-abstract-full" style="display: none;"> We introduce Michelangelo: a minimal, synthetic, and unleaked long-context reasoning evaluation for large language models which is also easy to automatically score. This evaluation is derived via a novel, unifying framework for evaluations over arbitrarily long contexts which measure the model's ability to do more than retrieve a single piece of information from its context. The central idea of the Latent Structure Queries framework (LSQ) is to construct tasks which require a model to ``chisel away'' the irrelevant information in the context, revealing a latent structure in the context. To verify a model's understanding of this latent structure, we query the model for details of the structure. Using LSQ, we produce three diagnostic long-context evaluations across code and natural-language domains intended to provide a stronger signal of long-context language model capabilities. We perform evaluations on several state-of-the-art models and demonstrate both that a) the proposed evaluations are high-signal and b) that there is significant room for improvement in synthesizing long-context information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12640v2-abstract-full').style.display = 'none'; document.getElementById('2409.12640v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10126">arXiv:2409.10126</a> <span> [<a href="https://arxiv.org/pdf/2409.10126">pdf</a>, <a href="https://arxiv.org/format/2409.10126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> </div> </div> <p class="title is-5 mathjax"> Data-free Non-intrusive Model Reduction for Nonlinear Finite Element Models via Spectral Submanifolds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingwu Li</a>, <a href="/search/cs?searchtype=author&query=Thurnher%2C+T">Thomas Thurnher</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhenwei Xu</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shobhit Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10126v1-abstract-short" style="display: inline;"> The theory of spectral submanifolds (SSMs) has emerged as a powerful tool for constructing rigorous, low-dimensional reduced-order models (ROMs) of high-dimensional nonlinear mechanical systems. A direct computation of SSMs requires explicit knowledge of nonlinear coefficients in the equations of motion, which limits their applicability to generic finite-element (FE) solvers. Here, we propose a no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10126v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10126v1-abstract-full" style="display: none;"> The theory of spectral submanifolds (SSMs) has emerged as a powerful tool for constructing rigorous, low-dimensional reduced-order models (ROMs) of high-dimensional nonlinear mechanical systems. A direct computation of SSMs requires explicit knowledge of nonlinear coefficients in the equations of motion, which limits their applicability to generic finite-element (FE) solvers. Here, we propose a non-intrusive algorithm for the computation of the SSMs and the associated ROMs up to arbitrary polynomial orders. This non-intrusive algorithm only requires system nonlinearity as a black box and hence, enables SSM-based model reduction via generic finite-element software. Our expressions and algorithms are valid for systems with up to cubic-order nonlinearities, including velocity-dependent nonlinear terms, asymmetric damping, and stiffness matrices, and hence work for a large class of mechanics problems. We demonstrate the effectiveness of the proposed non-intrusive approach over a variety of FE examples of increasing complexity, including a micro-resonator FE model containing more than a million degrees of freedom. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10126v1-abstract-full').style.display = 'none'; document.getElementById('2409.10126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08495">arXiv:2409.08495</a> <span> [<a href="https://arxiv.org/pdf/2409.08495">pdf</a>, <a href="https://arxiv.org/format/2409.08495">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Consumable Data via Quantum Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gilboa%2C+D">Dar Gilboa</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Siddhartha Jain</a>, <a href="/search/cs?searchtype=author&query=McClean%2C+J">Jarrod McClean</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08495v2-abstract-short" style="display: inline;"> Classical data can be copied and re-used for computation, with adverse consequences economically and in terms of data privacy. Motivated by this, we formulate problems in one-way communication complexity where Alice holds some data and Bob holds $m$ inputs, and he wants to compute $m$ instances of a bipartite relation on Alice's data and each of his inputs. We call this the asymmetric direct sum q… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08495v2-abstract-full').style.display = 'inline'; document.getElementById('2409.08495v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08495v2-abstract-full" style="display: none;"> Classical data can be copied and re-used for computation, with adverse consequences economically and in terms of data privacy. Motivated by this, we formulate problems in one-way communication complexity where Alice holds some data and Bob holds $m$ inputs, and he wants to compute $m$ instances of a bipartite relation on Alice's data and each of his inputs. We call this the asymmetric direct sum question for one-way communication. We give a number of examples where the quantum communication complexity of such problems scales polynomially with $m$, while the classical communication complexity depends at most logarithmically on $m$. For these examples, data behaves like a consumable resource when the owner stores and transmits it as quantum states. We show an application to a strategic data-selling game, and discuss other potential economic implications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08495v2-abstract-full').style.display = 'none'; document.getElementById('2409.08495v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06493">arXiv:2409.06493</a> <span> [<a href="https://arxiv.org/pdf/2409.06493">pdf</a>, <a href="https://arxiv.org/format/2409.06493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Elucidating Optimal Reward-Diversity Tradeoffs in Text-to-Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jena%2C+R">Rohit Jena</a>, <a href="/search/cs?searchtype=author&query=Taghibakhshi%2C+A">Ali Taghibakhshi</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sahil Jain</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+G">Gerald Shen</a>, <a href="/search/cs?searchtype=author&query=Tajbakhsh%2C+N">Nima Tajbakhsh</a>, <a href="/search/cs?searchtype=author&query=Vahdat%2C+A">Arash Vahdat</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06493v1-abstract-short" style="display: inline;"> Text-to-image (T2I) diffusion models have become prominent tools for generating high-fidelity images from text prompts. However, when trained on unfiltered internet data, these models can produce unsafe, incorrect, or stylistically undesirable images that are not aligned with human preferences. To address this, recent approaches have incorporated human preference datasets to fine-tune T2I models o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06493v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06493v1-abstract-full" style="display: none;"> Text-to-image (T2I) diffusion models have become prominent tools for generating high-fidelity images from text prompts. However, when trained on unfiltered internet data, these models can produce unsafe, incorrect, or stylistically undesirable images that are not aligned with human preferences. To address this, recent approaches have incorporated human preference datasets to fine-tune T2I models or to optimize reward functions that capture these preferences. Although effective, these methods are vulnerable to reward hacking, where the model overfits to the reward function, leading to a loss of diversity in the generated images. In this paper, we prove the inevitability of reward hacking and study natural regularization techniques like KL divergence and LoRA scaling, and their limitations for diffusion models. We also introduce Annealed Importance Guidance (AIG), an inference-time regularization inspired by Annealed Importance Sampling, which retains the diversity of the base model while achieving Pareto-Optimal reward-diversity tradeoffs. Our experiments demonstrate the benefits of AIG for Stable Diffusion models, striking the optimal balance between reward optimization and image diversity. Furthermore, a user study confirms that AIG improves diversity and quality of generated images across different model architectures and reward functions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06493v1-abstract-full').style.display = 'none'; document.getElementById('2409.06493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04652">arXiv:2409.04652</a> <span> [<a href="https://arxiv.org/pdf/2409.04652">pdf</a>, <a href="https://arxiv.org/format/2409.04652">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Privacy-Preserving Race/Ethnicity Estimation for Algorithmic Bias Measurement in the U.S </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Badrinarayanan%2C+S">Saikrishna Badrinarayanan</a>, <a href="/search/cs?searchtype=author&query=Osoba%2C+O">Osonde Osoba</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Miao Cheng</a>, <a href="/search/cs?searchtype=author&query=Rogers%2C+R">Ryan Rogers</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sakshi Jain</a>, <a href="/search/cs?searchtype=author&query=Tandra%2C+R">Rahul Tandra</a>, <a href="/search/cs?searchtype=author&query=Pillai%2C+N+S">Natesh S. Pillai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04652v2-abstract-short" style="display: inline;"> AI fairness measurements, including tests for equal treatment, often take the form of disaggregated evaluations of AI systems. Such measurements are an important part of Responsible AI operations. These measurements compare system performance across demographic groups or sub-populations and typically require member-level demographic signals such as gender, race, ethnicity, and location. However, s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04652v2-abstract-full').style.display = 'inline'; document.getElementById('2409.04652v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04652v2-abstract-full" style="display: none;"> AI fairness measurements, including tests for equal treatment, often take the form of disaggregated evaluations of AI systems. Such measurements are an important part of Responsible AI operations. These measurements compare system performance across demographic groups or sub-populations and typically require member-level demographic signals such as gender, race, ethnicity, and location. However, sensitive member-level demographic attributes like race and ethnicity can be challenging to obtain and use due to platform choices, legal constraints, and cultural norms. In this paper, we focus on the task of enabling AI fairness measurements on race/ethnicity for \emph{U.S. LinkedIn members} in a privacy-preserving manner. We present the Privacy-Preserving Probabilistic Race/Ethnicity Estimation (PPRE) method for performing this task. PPRE combines the Bayesian Improved Surname Geocoding (BISG) model, a sparse LinkedIn survey sample of self-reported demographics, and privacy-enhancing technologies like secure two-party computation and differential privacy to enable meaningful fairness measurements while preserving member privacy. We provide details of the PPRE method and its privacy guarantees. We then illustrate sample measurement operations. We conclude with a review of open research and engineering challenges for expanding our privacy-preserving fairness measurement capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04652v2-abstract-full').style.display = 'none'; document.getElementById('2409.04652v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Saikrishna Badrinarayanan and Osonde Osoba contributed equally to this work. Updating text to indicate limitations of sample analyses</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04649">arXiv:2409.04649</a> <span> [<a href="https://arxiv.org/pdf/2409.04649">pdf</a>, <a href="https://arxiv.org/format/2409.04649">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Preserving Individuality while Following the Crowd: Understanding the Role of User Taste and Crowd Wisdom in Online Product Rating Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubham Jain</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Y">Yingtong Dou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junpeng Wang</a>, <a href="/search/cs?searchtype=author&query=Yeh%2C+C+M">Chin-Chia Michael Yeh</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yujie Fan</a>, <a href="/search/cs?searchtype=author&query=Aboagye%2C+P">Prince Aboagye</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yan Zheng</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+X">Xin Dai</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+Z">Zhongfang Zhuang</a>, <a href="/search/cs?searchtype=author&query=Saini%2C+U+S">Uday Singh Saini</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04649v1-abstract-short" style="display: inline;"> Numerous algorithms have been developed for online product rating prediction, but the specific influence of user and product information in determining the final prediction score remains largely unexplored. Existing research often relies on narrowly defined data settings, which overlooks real-world challenges such as the cold-start problem, cross-category information utilization, and scalability a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04649v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04649v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04649v1-abstract-full" style="display: none;"> Numerous algorithms have been developed for online product rating prediction, but the specific influence of user and product information in determining the final prediction score remains largely unexplored. Existing research often relies on narrowly defined data settings, which overlooks real-world challenges such as the cold-start problem, cross-category information utilization, and scalability and deployment issues. To delve deeper into these aspects, and particularly to uncover the roles of individual user taste and collective wisdom, we propose a unique and practical approach that emphasizes historical ratings at both the user and product levels, encapsulated using a continuously updated dynamic tree representation. This representation effectively captures the temporal dynamics of users and products, leverages user information across product categories, and provides a natural solution to the cold-start problem. Furthermore, we have developed an efficient data processing strategy that makes this approach highly scalable and easily deployable. Comprehensive experiments in real industry settings demonstrate the effectiveness of our approach. Notably, our findings reveal that individual taste dominates over collective wisdom in online product rating prediction, a perspective that contrasts with the commonly observed wisdom of the crowd phenomenon in other domains. This dominance of individual user taste is consistent across various model types, including the boosting tree model, recurrent neural network (RNN), and transformer-based architectures. This observation holds true across the overall population, within individual product categories, and in cold-start scenarios. Our findings underscore the significance of individual user tastes in the context of online product rating prediction and the robustness of our approach across different model architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04649v1-abstract-full').style.display = 'none'; document.getElementById('2409.04649v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02122">arXiv:2409.02122</a> <span> [<a href="https://arxiv.org/pdf/2409.02122">pdf</a>, <a href="https://arxiv.org/format/2409.02122">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Deep Knowledge-Infusion For Explainable Depression Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dalal%2C+S">Sumit Dalal</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sarika Jain</a>, <a href="/search/cs?searchtype=author&query=Dave%2C+M">Mayank Dave</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02122v1-abstract-short" style="display: inline;"> Discovering individuals depression on social media has become increasingly important. Researchers employed ML/DL or lexicon-based methods for automated depression detection. Lexicon based methods, explainable and easy to implement, match words from user posts in a depression dictionary without considering contexts. While the DL models can leverage contextual information, their black-box nature lim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02122v1-abstract-full').style.display = 'inline'; document.getElementById('2409.02122v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02122v1-abstract-full" style="display: none;"> Discovering individuals depression on social media has become increasingly important. Researchers employed ML/DL or lexicon-based methods for automated depression detection. Lexicon based methods, explainable and easy to implement, match words from user posts in a depression dictionary without considering contexts. While the DL models can leverage contextual information, their black-box nature limits their adoption in the domain. Though surrogate models like LIME and SHAP can produce explanations for DL models, the explanations are suitable for the developer and of limited use to the end user. We propose a Knolwedge-infused Neural Network (KiNN) incorporating domain-specific knowledge from DepressionFeature ontology (DFO) in a neural network to endow the model with user-level explainability regarding concepts and processes the clinician understands. Further, commonsense knowledge from the Commonsense Transformer (COMET) trained on ATOMIC is also infused to consider the generic emotional aspects of user posts in depression detection. The model is evaluated on three expertly curated datasets related to depression. We observed the model to have a statistically significant (p<0.1) boost in performance over the best domain-specific model, MentalBERT, across CLEF e-Risk (25% MCC increase, 12% F1 increase). A similar trend is observed across the PRIMATE dataset, where the proposed model performed better than MentalBERT (2.5% MCC increase, 19% F1 increase). The observations confirm the generated explanations to be informative for MHPs compared to post hoc model explanations. Results demonstrated that the user-level explainability of KiNN also surpasses the performance of baseline models and can provide explanations where other baselines fall short. Infusing the domain and commonsense knowledge in KiNN enhances the ability of models like GPT-3.5 to generate application-relevant explanations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02122v1-abstract-full').style.display = 'none'; document.getElementById('2409.02122v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16125">arXiv:2408.16125</a> <span> [<a href="https://arxiv.org/pdf/2408.16125">pdf</a>, <a href="https://arxiv.org/format/2408.16125">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> DECAF: a Discrete-Event based Collaborative Human-Robot Framework for Furniture Assembly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Giacomuzzo%2C+G">Giulio Giacomuzzo</a>, <a href="/search/cs?searchtype=author&query=Terreran%2C+M">Matteo Terreran</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Siddarth Jain</a>, <a href="/search/cs?searchtype=author&query=Romeres%2C+D">Diego Romeres</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16125v1-abstract-short" style="display: inline;"> This paper proposes a task planning framework for collaborative Human-Robot scenarios, specifically focused on assembling complex systems such as furniture. The human is characterized as an uncontrollable agent, implying for example that the agent is not bound by a pre-established sequence of actions and instead acts according to its own preferences. Meanwhile, the task planner computes reactively… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16125v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16125v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16125v1-abstract-full" style="display: none;"> This paper proposes a task planning framework for collaborative Human-Robot scenarios, specifically focused on assembling complex systems such as furniture. The human is characterized as an uncontrollable agent, implying for example that the agent is not bound by a pre-established sequence of actions and instead acts according to its own preferences. Meanwhile, the task planner computes reactively the optimal actions for the collaborative robot to efficiently complete the entire assembly task in the least time possible. We formalize the problem as a Discrete Event Markov Decision Problem (DE-MDP), a comprehensive framework that incorporates a variety of asynchronous behaviors, human change of mind and failure recovery as stochastic events. Although the problem could theoretically be addressed by constructing a graph of all possible actions, such an approach would be constrained by computational limitations. The proposed formulation offers an alternative solution utilizing Reinforcement Learning to derive an optimal policy for the robot. Experiments where conducted both in simulation and on a real system with human subjects assembling a chair in collaboration with a 7-DoF manipulator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16125v1-abstract-full').style.display = 'none'; document.getElementById('2408.16125v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 6 figures, extended version of accepted paper at IRO24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12752">arXiv:2408.12752</a> <span> [<a href="https://arxiv.org/pdf/2408.12752">pdf</a>, <a href="https://arxiv.org/format/2408.12752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Number Theory">math.NT</span> </div> </div> <p class="title is-5 mathjax"> High-distance codes with transversal Clifford and T-gates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S+P">Shubham P. Jain</a>, <a href="/search/cs?searchtype=author&query=Albert%2C+V+V">Victor V. Albert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12752v2-abstract-short" style="display: inline;"> The non-local interactions in several quantum devices allow for the realization of more compact quantum encodings while retaining the same degree of protection against noise. Anticipating that short to medium-length codes will soon be realizable, it is important to construct stabilizer codes that, for a given code distance, admit fault-tolerant implementations of logical gates with the fewest numb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12752v2-abstract-full').style.display = 'inline'; document.getElementById('2408.12752v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12752v2-abstract-full" style="display: none;"> The non-local interactions in several quantum devices allow for the realization of more compact quantum encodings while retaining the same degree of protection against noise. Anticipating that short to medium-length codes will soon be realizable, it is important to construct stabilizer codes that, for a given code distance, admit fault-tolerant implementations of logical gates with the fewest number of physical qubits. We extract high-distance doubly even codes from the quantum quadratic-residue code family that admit a transversal implementation of the single-qubit Clifford group and block transversal implementation of the full Clifford group. Applying a doubling procedure [arXiv:1509.03239] to such codes yields a family of high-distance weak triply even codes which admit a transversal implementation of the logical $\texttt{T}$-gate. Relaxing the triply even property, we also obtain a family of triorthogonal codes which requires an even lower overhead at the cost of additional Clifford gates to achieve the same logical operation. To our knowledge, our doubly even and triorthogonal families are the shortest qubit stabilizer codes of the same distance that can realize their respective gates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12752v2-abstract-full').style.display = 'none'; document.getElementById('2408.12752v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2 tables, 3 figures. Updated version: Includes a family of triorthogonal codes with improved parameters. Includes a more in-depth discussion of T-gate code families</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11058">arXiv:2408.11058</a> <span> [<a href="https://arxiv.org/pdf/2408.11058">pdf</a>, <a href="https://arxiv.org/format/2408.11058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> LLM Agents Improve Semantic Code Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sarthak Jain</a>, <a href="/search/cs?searchtype=author&query=Dora%2C+A">Aditya Dora</a>, <a href="/search/cs?searchtype=author&query=Sam%2C+K+S">Ka Seng Sam</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+P">Prabhat Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11058v1-abstract-short" style="display: inline;"> Code Search is a key task that many programmers often have to perform while developing solutions to problems. Current methodologies suffer from an inability to perform accurately on prompts that contain some ambiguity or ones that require additional context relative to a code-base. We introduce the approach of using Retrieval Augmented Generation (RAG) powered agents to inject information into use… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11058v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11058v1-abstract-full" style="display: none;"> Code Search is a key task that many programmers often have to perform while developing solutions to problems. Current methodologies suffer from an inability to perform accurately on prompts that contain some ambiguity or ones that require additional context relative to a code-base. We introduce the approach of using Retrieval Augmented Generation (RAG) powered agents to inject information into user prompts allowing for better inputs into embedding models. By utilizing RAG, agents enhance user queries with relevant details from GitHub repositories, making them more informative and contextually aligned. Additionally, we introduce a multi-stream ensemble approach which when paired with agentic workflow can obtain improved retrieval accuracy, which we deploy on application called repo-rift.com. Experimental results on the CodeSearchNet dataset demonstrate that RepoRift significantly outperforms existing methods, achieving an 78.2% success rate at Success@10 and a 34.6% success rate at Success@1. This research presents a substantial advancement in semantic code search, highlighting the potential of agentic LLMs and RAG to enhance code retrieval systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11058v1-abstract-full').style.display = 'none'; document.getElementById('2408.11058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 1 Figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08808">arXiv:2408.08808</a> <span> [<a href="https://arxiv.org/pdf/2408.08808">pdf</a>, <a href="https://arxiv.org/format/2408.08808">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Constructing Domain-Specific Evaluation Sets for LLM-as-a-judge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Raju%2C+R">Ravi Raju</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Swayambhoo Jain</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bo Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jonathan Li</a>, <a href="/search/cs?searchtype=author&query=Thakker%2C+U">Urmish Thakker</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08808v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) have revolutionized the landscape of machine learning, yet current benchmarks often fall short in capturing the diverse behavior of these models in real-world applications. A benchmark's usefulness is determined by its ability to clearly differentiate between models of varying capabilities (separability) and closely align with human preferences. Existing frameworks lik… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08808v3-abstract-full').style.display = 'inline'; document.getElementById('2408.08808v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08808v3-abstract-full" style="display: none;"> Large Language Models (LLMs) have revolutionized the landscape of machine learning, yet current benchmarks often fall short in capturing the diverse behavior of these models in real-world applications. A benchmark's usefulness is determined by its ability to clearly differentiate between models of varying capabilities (separability) and closely align with human preferences. Existing frameworks like Alpaca-Eval 2.0 LC \cite{dubois2024lengthcontrolledalpacaevalsimpleway} and Arena-Hard v0.1 \cite{li2024crowdsourced} are limited by their focus on general-purpose queries and lack of diversity across domains such as law, medicine, and multilingual contexts. In this paper, we address these limitations by introducing a novel data pipeline that curates diverse, domain-specific evaluation sets tailored for LLM-as-a-Judge frameworks. Our approach leverages a combination of manual curation, semi-supervised learning to generate clusters, and stratified sampling to ensure balanced representation across a wide range of domains and languages. The resulting evaluation set, which includes 1573 samples across 14 categories, demonstrates high separability (84\%) across ten top-ranked models, and agreement (84\%) with Chatbot Arena and (0.915) Spearman correlation. The agreement values are 9\% better than Arena Hard and 20\% better than AlpacaEval 2.0 LC, while the Spearman coefficient is 0.7 more than the next best benchmark, showcasing a significant improvement in the usefulness of the benchmark. We further provide an open-source evaluation tool that enables fine-grained analysis of model performance across user-defined categories, offering valuable insights for practitioners. This work contributes to the ongoing effort to enhance the transparency, diversity, and effectiveness of LLM evaluation methodologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08808v3-abstract-full').style.display = 'none'; document.getElementById('2408.08808v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 8 figures, Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08477">arXiv:2408.08477</a> <span> [<a href="https://arxiv.org/pdf/2408.08477">pdf</a>, <a href="https://arxiv.org/format/2408.08477">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Automating Transparency Mechanisms in the Judicial System Using LLMs: Opportunities and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shastri%2C+I">Ishana Shastri</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shomik Jain</a>, <a href="/search/cs?searchtype=author&query=Engelhardt%2C+B">Barbara Engelhardt</a>, <a href="/search/cs?searchtype=author&query=Wilson%2C+A">Ashia Wilson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08477v1-abstract-short" style="display: inline;"> Bringing more transparency to the judicial system for the purposes of increasing accountability often demands extensive effort from auditors who must meticulously sift through numerous disorganized legal case files to detect patterns of bias and errors. For example, the high-profile investigation into the Curtis Flowers case took seven reporters a full year to assemble evidence about the prosecuto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08477v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08477v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08477v1-abstract-full" style="display: none;"> Bringing more transparency to the judicial system for the purposes of increasing accountability often demands extensive effort from auditors who must meticulously sift through numerous disorganized legal case files to detect patterns of bias and errors. For example, the high-profile investigation into the Curtis Flowers case took seven reporters a full year to assemble evidence about the prosecutor's history of selecting racially biased juries. LLMs have the potential to automate and scale these transparency pipelines, especially given their demonstrated capabilities to extract information from unstructured documents. We discuss the opportunities and challenges of using LLMs to provide transparency in two important court processes: jury selection in criminal trials and housing eviction cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08477v1-abstract-full').style.display = 'none'; document.getElementById('2408.08477v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the Seventh AAAI/ACM Conference on AI, Ethics, and Society (AIES 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07892">arXiv:2408.07892</a> <span> [<a href="https://arxiv.org/pdf/2408.07892">pdf</a>, <a href="https://arxiv.org/format/2408.07892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Personhood credentials: Artificial intelligence and the value of privacy-preserving tools to distinguish who is real online </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Adler%2C+S">Steven Adler</a>, <a href="/search/cs?searchtype=author&query=Hitzig%2C+Z">Zo毛 Hitzig</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shrey Jain</a>, <a href="/search/cs?searchtype=author&query=Brewer%2C+C">Catherine Brewer</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+W">Wayne Chang</a>, <a href="/search/cs?searchtype=author&query=DiResta%2C+R">Ren茅e DiResta</a>, <a href="/search/cs?searchtype=author&query=Lazzarin%2C+E">Eddy Lazzarin</a>, <a href="/search/cs?searchtype=author&query=McGregor%2C+S">Sean McGregor</a>, <a href="/search/cs?searchtype=author&query=Seltzer%2C+W">Wendy Seltzer</a>, <a href="/search/cs?searchtype=author&query=Siddarth%2C+D">Divya Siddarth</a>, <a href="/search/cs?searchtype=author&query=Soliman%2C+N">Nouran Soliman</a>, <a href="/search/cs?searchtype=author&query=South%2C+T">Tobin South</a>, <a href="/search/cs?searchtype=author&query=Spelliscy%2C+C">Connor Spelliscy</a>, <a href="/search/cs?searchtype=author&query=Sporny%2C+M">Manu Sporny</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+V">Varya Srivastava</a>, <a href="/search/cs?searchtype=author&query=Bailey%2C+J">John Bailey</a>, <a href="/search/cs?searchtype=author&query=Christian%2C+B">Brian Christian</a>, <a href="/search/cs?searchtype=author&query=Critch%2C+A">Andrew Critch</a>, <a href="/search/cs?searchtype=author&query=Falcon%2C+R">Ronnie Falcon</a>, <a href="/search/cs?searchtype=author&query=Flanagan%2C+H">Heather Flanagan</a>, <a href="/search/cs?searchtype=author&query=Duffy%2C+K+H">Kim Hamilton Duffy</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+E">Eric Ho</a>, <a href="/search/cs?searchtype=author&query=Leibowicz%2C+C+R">Claire R. Leibowicz</a>, <a href="/search/cs?searchtype=author&query=Nadhamuni%2C+S">Srikanth Nadhamuni</a>, <a href="/search/cs?searchtype=author&query=Rozenshtein%2C+A+Z">Alan Z. Rozenshtein</a> , et al. (7 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07892v3-abstract-short" style="display: inline;"> Anonymity is an important principle online. However, malicious actors have long used misleading identities to conduct fraud, spread disinformation, and carry out other deceptive schemes. With the advent of increasingly capable AI, bad actors can amplify the potential scale and effectiveness of their operations, intensifying the challenge of balancing anonymity and trustworthiness online. In this p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07892v3-abstract-full').style.display = 'inline'; document.getElementById('2408.07892v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07892v3-abstract-full" style="display: none;"> Anonymity is an important principle online. However, malicious actors have long used misleading identities to conduct fraud, spread disinformation, and carry out other deceptive schemes. With the advent of increasingly capable AI, bad actors can amplify the potential scale and effectiveness of their operations, intensifying the challenge of balancing anonymity and trustworthiness online. In this paper, we analyze the value of a new tool to address this challenge: "personhood credentials" (PHCs), digital credentials that empower users to demonstrate that they are real people -- not AIs -- to online services, without disclosing any personal information. Such credentials can be issued by a range of trusted institutions -- governments or otherwise. A PHC system, according to our definition, could be local or global, and does not need to be biometrics-based. Two trends in AI contribute to the urgency of the challenge: AI's increasing indistinguishability from people online (i.e., lifelike content and avatars, agentic activity), and AI's increasing scalability (i.e., cost-effectiveness, accessibility). Drawing on a long history of research into anonymous credentials and "proof-of-personhood" systems, personhood credentials give people a way to signal their trustworthiness on online platforms, and offer service providers new tools for reducing misuse by bad actors. In contrast, existing countermeasures to automated deception -- such as CAPTCHAs -- are inadequate against sophisticated AI, while stringent identity verification solutions are insufficiently private for many use-cases. After surveying the benefits of personhood credentials, we also examine deployment risks and design challenges. We conclude with actionable next steps for policymakers, technologists, and standards bodies to consider in consultation with the public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07892v3-abstract-full').style.display = 'none'; document.getElementById('2408.07892v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">63 pages, 7 figures, 5 tables; minor additions to acknowledgments and wording changes for clarity; corrected typo</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17512">arXiv:2407.17512</a> <span> [<a href="https://arxiv.org/pdf/2407.17512">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TGCN.2024.3396162">10.1109/TGCN.2024.3396162 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Green and Safe 6G Wireless Networks: A Hybrid Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kour%2C+H">Haneet Kour</a>, <a href="/search/cs?searchtype=author&query=Jha%2C+R+K">Rakesh Kumar Jha</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanjeev Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17512v1-abstract-short" style="display: inline;"> With the wireless internet access being increasingly popular with services such as HD video streaming and so on, the demand for high data consuming applications is also rising. This increment in demand is coupled with a proportional rise in the power consumption. It is required that the internet traffic is offloaded to technologies that serve the users and contribute in energy consumption. There i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17512v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17512v1-abstract-full" style="display: none;"> With the wireless internet access being increasingly popular with services such as HD video streaming and so on, the demand for high data consuming applications is also rising. This increment in demand is coupled with a proportional rise in the power consumption. It is required that the internet traffic is offloaded to technologies that serve the users and contribute in energy consumption. There is a need to decrease the carbon footprint in the atmosphere and also make the network safe and reliable. In this article we propose a hybrid system of RF (Radio Frequency) and VLC (Visible Light Communication) for indoor communication that can provide communication along with illumination with least power consumption. The hybrid network is viable as it utilizes power with respect to the user demand and maintains the required Quality of ServiceQoS and Quality of Experience QoE for a particular application in use. This scheme aims for Green Communication and reduction in Electromagnetic EM Radiation. A comparative analysis for RF communication, Hybrid RF+ VLC and pure VLC is made and simulations are carried out using Python, Scilab and MathWorks tool. The proposal achieves high energy efficiency of about 37% low Specific Absorption Rate (SAR) lower incident and absorbed power density complexity and temperature elevation in human body tissues exposed to the radiation. It also enhances the battery lifetime of the mobile device in use by increasing the lifetime approximately by 7 hours as validated from the obtained results. Thus the overall network reliability and safety factor is enhanced with the proposed approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17512v1-abstract-full').style.display = 'none'; document.getElementById('2407.17512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Green Communications and Networking 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17511">arXiv:2407.17511</a> <span> [<a href="https://arxiv.org/pdf/2407.17511">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MPOT.2021.3091077">10.1109/MPOT.2021.3091077 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Thermal Radiation (TR) mode: A Deployment Perspective for 5G NR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kour%2C+H">Haneet Kour</a>, <a href="/search/cs?searchtype=author&query=Jha%2C+R+K">Rakesh Kumar Jha</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanjeev Jain</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubha Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17511v1-abstract-short" style="display: inline;"> The 5G New Radio NR technology is under standardization process by 3GPP to provide outline for a new radio interface for the next generation of cellular networks. The aim of the 5G networks include not only to provide enhanced capacity coverage but also support advanced services such as enhanced mobile broadband (eMBB) Ultra-Reliable Low Latency Communication URLLC massive Machine Type Communicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17511v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17511v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17511v1-abstract-full" style="display: none;"> The 5G New Radio NR technology is under standardization process by 3GPP to provide outline for a new radio interface for the next generation of cellular networks. The aim of the 5G networks include not only to provide enhanced capacity coverage but also support advanced services such as enhanced mobile broadband (eMBB) Ultra-Reliable Low Latency Communication URLLC massive Machine Type Communication mMTC. Key features of NR include Ultra lean carrier design to minimize the power consumption by limiting the always-on signal transmissions and to reduce interference in the neighboring cells . Another feature is the use of massive number of antennas for transmission as well as reception of signals. This rise in the number of antennas to provide a greater coverage brings about various challenges and impact in the system. With the increase in investigations in the mmWave frequencies, there is a need to investigate the health hazards they have on human body and the environment at large. This paper intends to provide an insight into the harmful impacts of Radio Frequency RF fields. The radiation metric to study the RF impact for far field is power density and for near field is Specific Absorption Rate SAR. These are the two main EM radiation metrics to find out the exposure due to uplink and downlink phenomenon in mobile communications. Mobile communication systems are addressed particularly to discuss the Electromagnetic EM Radiation impact as smart phones are used in close proximity to the body. A proposal in the form of Thermal Radiation TR mode is given to reduce the radiations emitted from a mobile phone. The performance of the proposed mode is validated from the results by achieving reduced power density, complexity and exposure ratio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17511v1-abstract-full').style.display = 'none'; document.getElementById('2407.17511v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Potentials, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17498">arXiv:2407.17498</a> <span> [<a href="https://arxiv.org/pdf/2407.17498">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TGCN.2023.3303471">10.1109/TGCN.2023.3303471 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Antenna Model for Safe Human Exposure in Future 6G Smartphones: A Network Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kour%2C+H">Haneet Kour</a>, <a href="/search/cs?searchtype=author&query=Jha%2C+R+K">Rakesh Kumar Jha</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanjeev Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17498v1-abstract-short" style="display: inline;"> In this article we present the biological effect of antenna topology on a users body. At different values of exposed frequency, the absorbent nature varies in human body. One of the major factors to be taken into consideration for designing 6G mobile antenna is the biological effect and Electromagnetic Field Exposure (EMF). </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17498v1-abstract-full" style="display: none;"> In this article we present the biological effect of antenna topology on a users body. At different values of exposed frequency, the absorbent nature varies in human body. One of the major factors to be taken into consideration for designing 6G mobile antenna is the biological effect and Electromagnetic Field Exposure (EMF). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17498v1-abstract-full').style.display = 'none'; document.getElementById('2407.17498v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> 10.1109/TGCN.2023.3303471 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Green Communications and Networking, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16805">arXiv:2407.16805</a> <span> [<a href="https://arxiv.org/pdf/2407.16805">pdf</a>, <a href="https://arxiv.org/format/2407.16805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> TAMIGO: Empowering Teaching Assistants using LLM-assisted viva and code assessment in an Advanced Computing Class </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=IIITD%2C+A">Anishka IIITD</a>, <a href="/search/cs?searchtype=author&query=Sethi%2C+D">Diksha Sethi</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+N">Nipun Gupta</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Shikhar Sharma</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Srishti Jain</a>, <a href="/search/cs?searchtype=author&query=Singhal%2C+U">Ujjwal Singhal</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+D">Dhruv Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16805v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have significantly transformed the educational landscape, offering new tools for students, instructors, and teaching assistants. This paper investigates the application of LLMs in assisting teaching assistants (TAs) with viva and code assessments in an advanced computing class on distributed systems in an Indian University. We develop TAMIGO, an LLM-based system for TA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16805v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16805v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have significantly transformed the educational landscape, offering new tools for students, instructors, and teaching assistants. This paper investigates the application of LLMs in assisting teaching assistants (TAs) with viva and code assessments in an advanced computing class on distributed systems in an Indian University. We develop TAMIGO, an LLM-based system for TAs to evaluate programming assignments. For viva assessment, the TAs generated questions using TAMIGO and circulated these questions to the students for answering. The TAs then used TAMIGO to generate feedback on student answers. For code assessment, the TAs selected specific code blocks from student code submissions and fed it to TAMIGO to generate feedback for these code blocks. The TAMIGO-generated feedback for student answers and code blocks was used by the TAs for further evaluation. We evaluate the quality of LLM-generated viva questions, model answers, feedback on viva answers, and feedback on student code submissions. Our results indicate that LLMs are highly effective at generating viva questions when provided with sufficient context and background information. However, the results for LLM-generated feedback on viva answers were mixed; instances of hallucination occasionally reduced the accuracy of feedback. Despite this, the feedback was consistent, constructive, comprehensive, balanced, and did not overwhelm the TAs. Similarly, for code submissions, the LLM-generated feedback was constructive, comprehensive and balanced, though there was room for improvement in aligning the feedback with the instructor-provided rubric for code evaluation. Our findings contribute to understanding the benefits and limitations of integrating LLMs into educational settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16805v1-abstract-full').style.display = 'none'; document.getElementById('2407.16805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14105">arXiv:2407.14105</a> <span> [<a href="https://arxiv.org/pdf/2407.14105">pdf</a>, <a href="https://arxiv.org/format/2407.14105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Quasi-Isometric Reductions Between Infinite Strings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Celine%2C+K+F">Karen Frilya Celine</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Ziyuan Gao</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanjay Jain</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+R">Ryan Lou</a>, <a href="/search/cs?searchtype=author&query=Stephan%2C+F">Frank Stephan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+G">Guohua Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14105v1-abstract-short" style="display: inline;"> This paper studies the recursion-theoretic aspects of large-scale geometries of infinite strings, a subject initiated by Khoussainov and Takisaka (2017). We investigate several notions of quasi-isometric reductions between recursive infinite strings and prove various results on the equivalence classes of such reductions. The main result is the construction of two infinite recursive strings $伪$ and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14105v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14105v1-abstract-full" style="display: none;"> This paper studies the recursion-theoretic aspects of large-scale geometries of infinite strings, a subject initiated by Khoussainov and Takisaka (2017). We investigate several notions of quasi-isometric reductions between recursive infinite strings and prove various results on the equivalence classes of such reductions. The main result is the construction of two infinite recursive strings $伪$ and $尾$ such that $伪$ is strictly quasi-isometrically reducible to $尾$, but the reduction cannot be made recursive. This answers an open problem posed by Khoussainov and Takisaka. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14105v1-abstract-full').style.display = 'none'; document.getElementById('2407.14105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10264">arXiv:2407.10264</a> <span> [<a href="https://arxiv.org/pdf/2407.10264">pdf</a>, <a href="https://arxiv.org/format/2407.10264">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> What Makes and Breaks Safety Fine-tuning? A Mechanistic Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Samyak Jain</a>, <a href="/search/cs?searchtype=author&query=Lubana%2C+E+S">Ekdeep Singh Lubana</a>, <a href="/search/cs?searchtype=author&query=Oksuz%2C+K">Kemal Oksuz</a>, <a href="/search/cs?searchtype=author&query=Joy%2C+T">Tom Joy</a>, <a href="/search/cs?searchtype=author&query=Torr%2C+P+H+S">Philip H. S. Torr</a>, <a href="/search/cs?searchtype=author&query=Sanyal%2C+A">Amartya Sanyal</a>, <a href="/search/cs?searchtype=author&query=Dokania%2C+P+K">Puneet K. Dokania</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10264v3-abstract-short" style="display: inline;"> Safety fine-tuning helps align Large Language Models (LLMs) with human preferences for their safe deployment. To better understand the underlying factors that make models safe via safety fine-tuning, we design a synthetic data generation framework that captures salient aspects of an unsafe input by modeling the interaction between the task the model is asked to perform (e.g., "design") versus the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10264v3-abstract-full').style.display = 'inline'; document.getElementById('2407.10264v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10264v3-abstract-full" style="display: none;"> Safety fine-tuning helps align Large Language Models (LLMs) with human preferences for their safe deployment. To better understand the underlying factors that make models safe via safety fine-tuning, we design a synthetic data generation framework that captures salient aspects of an unsafe input by modeling the interaction between the task the model is asked to perform (e.g., "design") versus the specific concepts the task is asked to be performed upon (e.g., a "cycle" vs. a "bomb"). Using this, we investigate three well-known safety fine-tuning methods -- supervised safety fine-tuning, direct preference optimization, and unlearning -- and provide significant evidence demonstrating that these methods minimally transform MLP weights to specifically align unsafe inputs into its weights' null space. This yields a clustering of inputs based on whether the model deems them safe or not. Correspondingly, when an adversarial input (e.g., a jailbreak) is provided, its activations are closer to safer samples, leading to the model processing such an input as if it were safe. We validate our findings, wherever possible, on real-world models -- specifically, Llama-2 7B and Llama-3 8B. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10264v3-abstract-full').style.display = 'none'; document.getElementById('2407.10264v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09473">arXiv:2407.09473</a> <span> [<a href="https://arxiv.org/pdf/2407.09473">pdf</a>, <a href="https://arxiv.org/format/2407.09473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> StyleSplat: 3D Object Style Transfer with Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sahil Jain</a>, <a href="/search/cs?searchtype=author&query=Kuthiala%2C+A">Avik Kuthiala</a>, <a href="/search/cs?searchtype=author&query=Sethi%2C+P+S">Prabhdeep Singh Sethi</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+P">Prakanshul Saxena</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09473v1-abstract-short" style="display: inline;"> Recent advancements in radiance fields have opened new avenues for creating high-quality 3D assets and scenes. Style transfer can enhance these 3D assets with diverse artistic styles, transforming creative expression. However, existing techniques are often slow or unable to localize style transfer to specific objects. We introduce StyleSplat, a lightweight method for stylizing 3D objects in scenes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09473v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09473v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09473v1-abstract-full" style="display: none;"> Recent advancements in radiance fields have opened new avenues for creating high-quality 3D assets and scenes. Style transfer can enhance these 3D assets with diverse artistic styles, transforming creative expression. However, existing techniques are often slow or unable to localize style transfer to specific objects. We introduce StyleSplat, a lightweight method for stylizing 3D objects in scenes represented by 3D Gaussians from reference style images. Our approach first learns a photorealistic representation of the scene using 3D Gaussian splatting while jointly segmenting individual 3D objects. We then use a nearest-neighbor feature matching loss to finetune the Gaussians of the selected objects, aligning their spherical harmonic coefficients with the style image to ensure consistency and visual appeal. StyleSplat allows for quick, customizable style transfer and localized stylization of multiple objects within a scene, each with a different style. We demonstrate its effectiveness across various 3D scenes and styles, showcasing enhanced control and customization in 3D creation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09473v1-abstract-full').style.display = 'none'; document.getElementById('2407.09473v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">for code and results, see http://bernard0047.github.io/stylesplat</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04302">arXiv:2407.04302</a> <span> [<a href="https://arxiv.org/pdf/2407.04302">pdf</a>, <a href="https://arxiv.org/format/2407.04302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fair Federated Data Clustering through Personalization: Bridging the Gap between Diverse Data Distributions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+S">Shivam Gupta</a>, <a href="/search/cs?searchtype=author&query=Tarushi"> Tarushi</a>, <a href="/search/cs?searchtype=author&query=Wangzes%2C+T">Tsering Wangzes</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shweta Jain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04302v2-abstract-short" style="display: inline;"> The rapid growth of data from edge devices has catalyzed the performance of machine learning algorithms. However, the data generated resides at client devices thus there are majorly two challenge faced by traditional machine learning paradigms - centralization of data for training and secondly for most the generated data the class labels are missing and there is very poor incentives to clients to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04302v2-abstract-full').style.display = 'inline'; document.getElementById('2407.04302v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04302v2-abstract-full" style="display: none;"> The rapid growth of data from edge devices has catalyzed the performance of machine learning algorithms. However, the data generated resides at client devices thus there are majorly two challenge faced by traditional machine learning paradigms - centralization of data for training and secondly for most the generated data the class labels are missing and there is very poor incentives to clients to manually label their data owing to high cost and lack of expertise. To overcome these issues, there have been initial attempts to handle unlabelled data in a privacy preserving distributed manner using unsupervised federated data clustering. The goal is partition the data available on clients into $k$ partitions (called clusters) without actual exchange of data. Most of the existing algorithms are highly dependent on data distribution patterns across clients or are computationally expensive. Furthermore, due to presence of skewed nature of data across clients in most of practical scenarios existing models might result in clients suffering high clustering cost making them reluctant to participate in federated process. To this, we are first to introduce the idea of personalization in federated clustering. The goal is achieve balance between achieving lower clustering cost and at same time achieving uniform cost across clients. We propose p-FClus that addresses these goal in a single round of communication between server and clients. We validate the efficacy of p-FClus against variety of federated datasets showcasing it's data independence nature, applicability to any finite $\ell$-norm, while simultaneously achieving lower cost and variance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04302v2-abstract-full').style.display = 'none'; document.getElementById('2407.04302v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02624">arXiv:2407.02624</a> <span> [<a href="https://arxiv.org/pdf/2407.02624">pdf</a>, <a href="https://arxiv.org/format/2407.02624">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Information Access in Networks via Edge Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhaskara%2C+A">Aditya Bhaskara</a>, <a href="/search/cs?searchtype=author&query=Crane%2C+A">Alex Crane</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shweta Jain</a>, <a href="/search/cs?searchtype=author&query=Mazumder%2C+M+M+H+U">Md Mumtahin Habib Ullah Mazumder</a>, <a href="/search/cs?searchtype=author&query=Sullivan%2C+B+D">Blair D. Sullivan</a>, <a href="/search/cs?searchtype=author&query=Yalamanchili%2C+P">Prasanth Yalamanchili</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02624v2-abstract-short" style="display: inline;"> Given a graph $G = (V, E)$ and a model of information flow on that network, a fundamental question is to understand whether all nodes have sufficient access to information generated at other nodes in the graph. If not, we can ask if a small set of interventions in the form of edge additions improve information access. Formally, the broadcast value of a network is defined to be the minimum over pai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02624v2-abstract-full').style.display = 'inline'; document.getElementById('2407.02624v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02624v2-abstract-full" style="display: none;"> Given a graph $G = (V, E)$ and a model of information flow on that network, a fundamental question is to understand whether all nodes have sufficient access to information generated at other nodes in the graph. If not, we can ask if a small set of interventions in the form of edge additions improve information access. Formally, the broadcast value of a network is defined to be the minimum over pairs $u,v \in V$ of the probability that an information cascade starting at $u$ reaches $v$. Having a high broadcast value ensures that every node has sufficient access to information spreading in a network, thus quantifying fairness of access. In this paper, we formally study the Broadcast Improvement problem: given $G$ and a parameter $k$, the goal is to find the best set of $k$ edges to add to $G$ in order to maximize the broadcast value of the resulting graph. We develop efficient approximation algorithms for this problem. If the optimal solution adds $k$ edges and achieves a broadcast of $尾^*$, we develop algorithms that can (a) add $k$ edges and achieve a broadcast value roughly $(尾^*)^4/16^k$, or (b) add $O(k\log n)$ edges and achieve a broadcast roughly $尾^*$. We also provide other trade-offs that can be better depending on the parameter values. Our algorithms rely on novel probabilistic tools to reason about the existence of paths in edge-sampled graphs, and extend to a single-source variant of the problem, where we obtain analogous algorithmic results. We complement our results by proving that unless P = NP, any algorithm that adds $O(k)$ edges must lose significantly in the approximation of $尾^*$, resolving an open question from prior work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02624v2-abstract-full').style.display = 'none'; document.getElementById('2407.02624v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Version 2 adds a new single-criteria approximation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19314">arXiv:2406.19314</a> <span> [<a href="https://arxiv.org/pdf/2406.19314">pdf</a>, <a href="https://arxiv.org/format/2406.19314">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LiveBench: A Challenging, Contamination-Free LLM Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=White%2C+C">Colin White</a>, <a href="/search/cs?searchtype=author&query=Dooley%2C+S">Samuel Dooley</a>, <a href="/search/cs?searchtype=author&query=Roberts%2C+M">Manley Roberts</a>, <a href="/search/cs?searchtype=author&query=Pal%2C+A">Arka Pal</a>, <a href="/search/cs?searchtype=author&query=Feuer%2C+B">Ben Feuer</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Siddhartha Jain</a>, <a href="/search/cs?searchtype=author&query=Shwartz-Ziv%2C+R">Ravid Shwartz-Ziv</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+N">Neel Jain</a>, <a href="/search/cs?searchtype=author&query=Saifullah%2C+K">Khalid Saifullah</a>, <a href="/search/cs?searchtype=author&query=Naidu%2C+S">Siddartha Naidu</a>, <a href="/search/cs?searchtype=author&query=Hegde%2C+C">Chinmay Hegde</a>, <a href="/search/cs?searchtype=author&query=LeCun%2C+Y">Yann LeCun</a>, <a href="/search/cs?searchtype=author&query=Goldstein%2C+T">Tom Goldstein</a>, <a href="/search/cs?searchtype=author&query=Neiswanger%2C+W">Willie Neiswanger</a>, <a href="/search/cs?searchtype=author&query=Goldblum%2C+M">Micah Goldblum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19314v1-abstract-short" style="display: inline;"> Test set contamination, wherein test data from a benchmark ends up in a newer model's training set, is a well-documented obstacle for fair LLM evaluation and can quickly render benchmarks obsolete. To mitigate this, many recent benchmarks crowdsource new prompts and evaluations from human or LLM judges; however, these can introduce significant biases, and break down when scoring hard questions. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19314v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19314v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19314v1-abstract-full" style="display: none;"> Test set contamination, wherein test data from a benchmark ends up in a newer model's training set, is a well-documented obstacle for fair LLM evaluation and can quickly render benchmarks obsolete. To mitigate this, many recent benchmarks crowdsource new prompts and evaluations from human or LLM judges; however, these can introduce significant biases, and break down when scoring hard questions. In this work, we introduce a new benchmark for LLMs designed to be immune to both test set contamination and the pitfalls of LLM judging and human crowdsourcing. We release LiveBench, the first benchmark that (1) contains frequently-updated questions from recent information sources, (2) scores answers automatically according to objective ground-truth values, and (3) contains a wide variety of challenging tasks, spanning math, coding, reasoning, language, instruction following, and data analysis. To achieve this, LiveBench contains questions that are based on recently-released math competitions, arXiv papers, news articles, and datasets, and it contains harder, contamination-free versions of tasks from previous benchmarks such as Big-Bench Hard, AMPS, and IFEval. We evaluate many prominent closed-source models, as well as dozens of open-source models ranging from 0.5B to 110B in size. LiveBench is difficult, with top models achieving below 65% accuracy. We release all questions, code, and model answers. Questions will be added and updated on a monthly basis, and we will release new tasks and harder versions of tasks over time so that LiveBench can distinguish between the capabilities of LLMs as they improve in the future. We welcome community engagement and collaboration for expanding the benchmark tasks and models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19314v1-abstract-full').style.display = 'none'; document.getElementById('2406.19314v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17975">arXiv:2406.17975</a> <span> [<a href="https://arxiv.org/pdf/2406.17975">pdf</a>, <a href="https://arxiv.org/ps/2406.17975">ps</a>, <a href="https://arxiv.org/format/2406.17975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SoK: Membership Inference Attacks on LLMs are Rushing Nowhere (and How to Fix It) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meeus%2C+M">Matthieu Meeus</a>, <a href="/search/cs?searchtype=author&query=Shilov%2C+I">Igor Shilov</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Shubham Jain</a>, <a href="/search/cs?searchtype=author&query=Faysse%2C+M">Manuel Faysse</a>, <a href="/search/cs?searchtype=author&query=Rei%2C+M">Marek Rei</a>, <a href="/search/cs?searchtype=author&query=de+Montjoye%2C+Y">Yves-Alexandre de Montjoye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17975v2-abstract-short" style="display: inline;"> Whether LLMs memorize their training data and what this means, from privacy leakage to detecting copyright violations -- has become a rapidly growing area of research over the last two years. In recent months, more than 10 new methods have been proposed to perform Membership Inference Attacks (MIAs) against LLMs. Contrary to traditional MIAs which rely on fixed -- but randomized -- records or mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17975v2-abstract-full').style.display = 'inline'; document.getElementById('2406.17975v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17975v2-abstract-full" style="display: none;"> Whether LLMs memorize their training data and what this means, from privacy leakage to detecting copyright violations -- has become a rapidly growing area of research over the last two years. In recent months, more than 10 new methods have been proposed to perform Membership Inference Attacks (MIAs) against LLMs. Contrary to traditional MIAs which rely on fixed -- but randomized -- records or models, these methods are mostly evaluated on datasets collected post-hoc. Sets of members and non-members, used to evaluate the MIA, are constructed using informed guesses after the release of a model. This lack of randomization raises concerns of a distribution shift between members and non-members. In the first part, we review the literature on MIAs against LLMs. While most work focuses on sequence-level MIAs evaluated in post-hoc setups, we show that a range of target models, motivations and units of interest have been considered in the literature. We then quantify distribution shifts present in the 6 datasets used in the literature, ranging from books to papers, using a bag of word classifier. Our analysis reveals that all of them suffer from severe distribution shifts. This challenges the validity of using such setups to measure LLM memorization and may undermine the benchmarking of recently proposed methods. Yet, all hope might not be lost. In the second part, we introduce important considerations to properly evaluate MIAs against LLMs and discuss potential ways forward: randomized test splits, injections of randomized (unique) sequences, randomized finetuning, and post-hoc control methods. While each option comes with its advantages and limitations, we believe they collectively provide solid grounds to guide the development of MIA methods and study LLM memorization. We conclude by proposing comprehensive, easy-to-use benchmarks for sequence- and document-level MIAs against LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17975v2-abstract-full').style.display = 'none'; document.getElementById('2406.17975v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16846">arXiv:2406.16846</a> <span> [<a href="https://arxiv.org/pdf/2406.16846">pdf</a>, <a href="https://arxiv.org/format/2406.16846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Data Debiasing with Datamodels (D3M): Improving Subgroup Robustness via Data Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jain%2C+S">Saachi Jain</a>, <a href="/search/cs?searchtype=author&query=Hamidieh%2C+K">Kimia Hamidieh</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+K">Kristian Georgiev</a>, <a href="/search/cs?searchtype=author&query=Ilyas%2C+A">Andrew Ilyas</a>, <a href="/search/cs?searchtype=author&query=Ghassemi%2C+M">Marzyeh Ghassemi</a>, <a href="/search/cs?searchtype=author&query=Madry%2C+A">Aleksander Madry</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16846v1-abstract-short" style="display: inline;"> Machine learning models can fail on subgroups that are underrepresented during training. While techniques such as dataset balancing can improve performance on underperforming groups, they require access to training group annotations and can end up removing large portions of the dataset. In this paper, we introduce Data Debiasing with Datamodels (D3M), a debiasing approach which isolates and remove… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16846v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16846v1-abstract-full" style="display: none;"> Machine learning models can fail on subgroups that are underrepresented during training. While techniques such as dataset balancing can improve performance on underperforming groups, they require access to training group annotations and can end up removing large portions of the dataset. In this paper, we introduce Data Debiasing with Datamodels (D3M), a debiasing approach which isolates and removes specific training examples that drive the model's failures on minority groups. Our approach enables us to efficiently train debiased classifiers while removing only a small number of examples, and does not require training group annotations or additional hyperparameter tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16846v1-abstract-full').style.display = 'none'; document.getElementById('2406.16846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15468">arXiv:2406.15468</a> <span> [<a href="https://arxiv.org/pdf/2406.15468">pdf</a>, <a href="https://arxiv.org/format/2406.15468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MMLU-SR: A Benchmark for Stress-Testing Reasoning Capability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wentian Wang</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sarthak Jain</a>, <a href="/search/cs?searchtype=author&query=Kantor%2C+P">Paul Kantor</a>, <a href="/search/cs?searchtype=author&query=Feldman%2C+J">Jacob Feldman</a>, <a href="/search/cs?searchtype=author&query=Gallos%2C+L">Lazaros Gallos</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15468v2-abstract-short" style="display: inline;"> We propose MMLU-SR, a novel dataset designed to measure the true comprehension abilities of Large Language Models (LLMs) by challenging their performance in question-answering tasks with modified terms. We reasoned that an agent that "truly" understands a concept can still evaluate it when key terms are replaced by suitably defined alternate terms, and sought to differentiate such comprehension fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15468v2-abstract-full').style.display = 'inline'; document.getElementById('2406.15468v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15468v2-abstract-full" style="display: none;"> We propose MMLU-SR, a novel dataset designed to measure the true comprehension abilities of Large Language Models (LLMs) by challenging their performance in question-answering tasks with modified terms. We reasoned that an agent that "truly" understands a concept can still evaluate it when key terms are replaced by suitably defined alternate terms, and sought to differentiate such comprehension from mere text replacement. In our study, we modified standardized test questions by replacing a key term with a dummy word along with its definition. The key term could be in the context of questions, answers, or both questions and answers. Notwithstanding the high scores achieved by recent popular LLMs on the MMLU leaderboard, we found a substantial reduction in model performance after such replacement, suggesting poor comprehension. This new benchmark provides a rigorous benchmark for testing true model comprehension, and poses a challenge to the broader scientific community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15468v2-abstract-full').style.display = 'none'; document.getElementById('2406.15468v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13383">arXiv:2406.13383</a> <span> [<a href="https://arxiv.org/pdf/2406.13383">pdf</a>, <a href="https://arxiv.org/format/2406.13383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cellular Automata and Lattice Gases">nlin.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Emergent Dynamics in Heterogeneous Life-Like Cellular Automata </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shrestha%2C+A">Aarati Shrestha</a>, <a href="/search/cs?searchtype=author&query=Reimers%2C+F">Felix Reimers</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+S">Sanyam Jain</a>, <a href="/search/cs?searchtype=author&query=Baldini%2C+P">Paolo Baldini</a>, <a href="/search/cs?searchtype=author&query=Braccini%2C+M">Michele Braccini</a>, <a href="/search/cs?searchtype=author&query=Roli%2C+A">Andrea Roli</a>, <a href="/search/cs?searchtype=author&query=Nichele%2C+S">Stefano Nichele</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13383v1-abstract-short" style="display: inline;"> The Game of Life (GoL), one well known 2D cellular automaton, does not typically ensure interesting long-term phenotypic dynamics. Therefore, while being Turing complete, GoL cannot be said to be open-ended. In this work, we extend GoL with the opportunity for local mutations, thus enabling a heterogeneous life-like cellular automaton guided by an evolutionary inner loop. Additionally, we introduc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13383v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13383v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13383v1-abstract-full" style="display: none;"> The Game of Life (GoL), one well known 2D cellular automaton, does not typically ensure interesting long-term phenotypic dynamics. Therefore, while being Turing complete, GoL cannot be said to be open-ended. In this work, we extend GoL with the opportunity for local mutations, thus enabling a heterogeneous life-like cellular automaton guided by an evolutionary inner loop. Additionally, we introduce the concept of cell ageing to ensure that cell aliveness (activated by inheritance with variation, and controlled by ageing) and actual cell computation (governed by life-like rules on local neighborhoods) are kept conceptually separated. We conduct an experimental campaign to identify suitable parameters that produce long-term phenotypic dynamics and favor genotypic innovations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13383v1-abstract-full').style.display = 'none'; document.getElementById('2406.13383v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 Figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jain%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jain%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository