Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 154 results for author: <span class="mathjax">Deshpande, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Deshpande%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Deshpande, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Deshpande%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Deshpande, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02172">arXiv:2410.02172</a> <span> [<a href="https://arxiv.org/pdf/2410.02172">pdf</a>, <a href="https://arxiv.org/format/2410.02172">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Abstract Reward Processes: Leveraging State Abstraction for Consistent Off-Policy Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chaudhari%2C+S">Shreyas Chaudhari</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=da+Silva%2C+B+C">Bruno Castro da Silva</a>, <a href="/search/cs?searchtype=author&query=Thomas%2C+P+S">Philip S. Thomas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02172v1-abstract-short" style="display: inline;"> Evaluating policies using off-policy data is crucial for applying reinforcement learning to real-world problems such as healthcare and autonomous driving. Previous methods for off-policy evaluation (OPE) generally suffer from high variance or irreducible bias, leading to unacceptably high prediction errors. In this work, we introduce STAR, a framework for OPE that encompasses a broad range of esti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02172v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02172v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02172v1-abstract-full" style="display: none;"> Evaluating policies using off-policy data is crucial for applying reinforcement learning to real-world problems such as healthcare and autonomous driving. Previous methods for off-policy evaluation (OPE) generally suffer from high variance or irreducible bias, leading to unacceptably high prediction errors. In this work, we introduce STAR, a framework for OPE that encompasses a broad range of estimators -- which include existing OPE methods as special cases -- that achieve lower mean squared prediction errors. STAR leverages state abstraction to distill complex, potentially continuous problems into compact, discrete models which we call abstract reward processes (ARPs). Predictions from ARPs estimated from off-policy data are provably consistent (asymptotically correct). Rather than proposing a specific estimator, we present a new framework for OPE and empirically demonstrate that estimators within STAR outperform existing methods. The best STAR estimator outperforms baselines in all twelve cases studied, and even the median STAR estimator surpasses the baselines in seven out of the twelve cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02172v1-abstract-full').style.display = 'none'; document.getElementById('2410.02172v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the Thirty-eighth Annual Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18416">arXiv:2407.18416</a> <span> [<a href="https://arxiv.org/pdf/2407.18416">pdf</a>, <a href="https://arxiv.org/format/2407.18416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PersonaGym: Evaluating Persona Agents and LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Samuel%2C+V">Vinay Samuel</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+H+P">Henry Peng Zou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yue Zhou</a>, <a href="/search/cs?searchtype=author&query=Chaudhari%2C+S">Shreyas Chaudhari</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18416v2-abstract-short" style="display: inline;"> Persona agents, which are LLM agents that act according to an assigned persona, have demonstrated impressive contextual response capabilities across various applications. These persona agents offer significant enhancements across diverse sectors, such as education, healthcare, and entertainment, where model developers can align agent responses to different user requirements thereby broadening the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18416v2-abstract-full').style.display = 'inline'; document.getElementById('2407.18416v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18416v2-abstract-full" style="display: none;"> Persona agents, which are LLM agents that act according to an assigned persona, have demonstrated impressive contextual response capabilities across various applications. These persona agents offer significant enhancements across diverse sectors, such as education, healthcare, and entertainment, where model developers can align agent responses to different user requirements thereby broadening the scope of agent applications. However, evaluating persona agent performance is incredibly challenging due to the complexity of assessing persona adherence in free-form interactions across various environments that are relevant to each persona agent. We introduce PersonaGym, the first dynamic evaluation framework for assessing persona agents, and PersonaScore, the first automated human-aligned metric grounded in decision theory for comprehensive large-scale evaluation of persona agents. Our evaluation of 6 open and closed-source LLMs, using a benchmark encompassing 200 personas and 10,000 questions, reveals significant opportunities for advancement in persona agent capabilities across state-of-the-art models. For example, Claude 3.5 Sonnet only has a 2.97% relative improvement in PersonaScore than GPT 3.5 despite being a much more advanced model. Importantly, we find that increased model size and complexity do not necessarily imply enhanced persona agent capabilities thereby highlighting the pressing need for algorithmic and architectural invention towards faithful and performant persona agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18416v2-abstract-full').style.display = 'none'; document.getElementById('2407.18416v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19150">arXiv:2406.19150</a> <span> [<a href="https://arxiv.org/pdf/2406.19150">pdf</a>, <a href="https://arxiv.org/format/2406.19150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> RAVEN: Multitask Retrieval Augmented Vision-Language Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rao%2C+V+N">Varun Nagaraj Rao</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+S">Siddharth Choudhary</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Aditya Deshpande</a>, <a href="/search/cs?searchtype=author&query=Satzoda%2C+R+K">Ravi Kumar Satzoda</a>, <a href="/search/cs?searchtype=author&query=Appalaraju%2C+S">Srikar Appalaraju</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19150v1-abstract-short" style="display: inline;"> The scaling of large language models to encode all the world's knowledge in model parameters is unsustainable and has exacerbated resource barriers. Retrieval-Augmented Generation (RAG) presents a potential solution, yet its application to vision-language models (VLMs) is under explored. Existing methods focus on models designed for single tasks. Furthermore, they're limited by the need for resour… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19150v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19150v1-abstract-full" style="display: none;"> The scaling of large language models to encode all the world's knowledge in model parameters is unsustainable and has exacerbated resource barriers. Retrieval-Augmented Generation (RAG) presents a potential solution, yet its application to vision-language models (VLMs) is under explored. Existing methods focus on models designed for single tasks. Furthermore, they're limited by the need for resource intensive pre training, additional parameter requirements, unaddressed modality prioritization and lack of clear benefit over non-retrieval baselines. This paper introduces RAVEN, a multitask retrieval augmented VLM framework that enhances base VLMs through efficient, task specific fine-tuning. By integrating retrieval augmented samples without the need for additional retrieval-specific parameters, we show that the model acquires retrieval properties that are effective across multiple tasks. Our results and extensive ablations across retrieved modalities for the image captioning and VQA tasks indicate significant performance improvements compared to non retrieved baselines +1 CIDEr on MSCOCO, +4 CIDEr on NoCaps and nearly a +3\% accuracy on specific VQA question types. This underscores the efficacy of applying RAG approaches to VLMs, marking a stride toward more efficient and accessible multimodal learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19150v1-abstract-full').style.display = 'none'; document.getElementById('2406.19150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03142">arXiv:2406.03142</a> <span> [<a href="https://arxiv.org/pdf/2406.03142">pdf</a>, <a href="https://arxiv.org/ps/2406.03142">ps</a>, <a href="https://arxiv.org/format/2406.03142">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Power of Randomization in Fair Classification and Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agarwal%2C+S">Sushant Agarwal</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03142v2-abstract-short" style="display: inline;"> Fair classification and fair representation learning are two important problems in supervised and unsupervised fair machine learning, respectively. Fair classification asks for a classifier that maximizes accuracy on a given data distribution subject to fairness constraints. Fair representation maps a given data distribution over the original feature space to a distribution over a new representati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03142v2-abstract-full').style.display = 'inline'; document.getElementById('2406.03142v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03142v2-abstract-full" style="display: none;"> Fair classification and fair representation learning are two important problems in supervised and unsupervised fair machine learning, respectively. Fair classification asks for a classifier that maximizes accuracy on a given data distribution subject to fairness constraints. Fair representation maps a given data distribution over the original feature space to a distribution over a new representation space such that all classifiers over the representation satisfy fairness. In this paper, we examine the power of randomization in both these problems to minimize the loss of accuracy that results when we impose fairness constraints. Previous work on fair classification has characterized the optimal fair classifiers on a given data distribution that maximize accuracy subject to fairness constraints, e.g., Demographic Parity (DP), Equal Opportunity (EO), and Predictive Equality (PE). We refine these characterizations to demonstrate when the optimal randomized fair classifiers can surpass their deterministic counterparts in accuracy. We also show how the optimal randomized fair classifier that we characterize can be obtained as a solution to a convex optimization problem. Recent work has provided techniques to construct fair representations for a given data distribution such that any classifier over this representation satisfies DP. However, the classifiers on these fair representations either come with no or weak accuracy guarantees when compared to the optimal fair classifier on the original data distribution. Extending our ideas for randomized fair classification, we improve on these works, and construct DP-fair, EO-fair, and PE-fair representations that have provably optimal accuracy and suffer no accuracy loss compared to the optimal DP-fair, EO-fair, and PE-fair classifiers respectively on the original data distribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03142v2-abstract-full').style.display = 'none'; document.getElementById('2406.03142v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Appeared in ACM FAccT 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19307">arXiv:2405.19307</a> <span> [<a href="https://arxiv.org/pdf/2405.19307">pdf</a>, <a href="https://arxiv.org/format/2405.19307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Data Efficient Behavior Cloning for Fine Manipulation via Continuity-based Corrective Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Abhay Deshpande</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+L">Liyiming Ke</a>, <a href="/search/cs?searchtype=author&query=Pfeifer%2C+Q">Quinn Pfeifer</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Abhishek Gupta</a>, <a href="/search/cs?searchtype=author&query=Srinivasa%2C+S+S">Siddhartha S. Srinivasa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19307v3-abstract-short" style="display: inline;"> We consider imitation learning with access only to expert demonstrations, whose real-world application is often limited by covariate shift due to compounding errors during execution. We investigate the effectiveness of the Continuity-based Corrective Labels for Imitation Learning (CCIL) framework in mitigating this issue for real-world fine manipulation tasks. CCIL generates corrective labels by l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19307v3-abstract-full').style.display = 'inline'; document.getElementById('2405.19307v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19307v3-abstract-full" style="display: none;"> We consider imitation learning with access only to expert demonstrations, whose real-world application is often limited by covariate shift due to compounding errors during execution. We investigate the effectiveness of the Continuity-based Corrective Labels for Imitation Learning (CCIL) framework in mitigating this issue for real-world fine manipulation tasks. CCIL generates corrective labels by learning a locally continuous dynamics model from demonstrations to guide the agent back toward expert states. Through extensive experiments on peg insertion and fine grasping, we provide the first empirical validation that CCIL can significantly improve imitation learning performance despite discontinuities present in contact-rich manipulation. We find that: (1) real-world manipulation exhibits sufficient local smoothness to apply CCIL, (2) generated corrective labels are most beneficial in low-data regimes, and (3) label filtering based on estimated dynamics model error enables performance gains. To effectively apply CCIL to robotic domains, we offer a practical instantiation of the framework and insights into design choices and hyperparameter selection. Our work demonstrates CCIL's practicality for alleviating compounding errors in imitation learning on physical robots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19307v3-abstract-full').style.display = 'none'; document.getElementById('2405.19307v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at IROS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04325">arXiv:2405.04325</a> <span> [<a href="https://arxiv.org/pdf/2405.04325">pdf</a>, <a href="https://arxiv.org/format/2405.04325">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Deception in Reinforced Autonomous Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dogra%2C+A">Atharvan Dogra</a>, <a href="/search/cs?searchtype=author&query=Pillutla%2C+K">Krishna Pillutla</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Sai%2C+A+B">Ananya B Sai</a>, <a href="/search/cs?searchtype=author&query=Nay%2C+J">John Nay</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Ravindran%2C+B">Balaraman Ravindran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04325v2-abstract-short" style="display: inline;"> We explore the ability of large language model (LLM)-based agents to engage in subtle deception such as strategically phrasing and intentionally manipulating information to misguide and deceive other agents. This harmful behavior can be hard to detect, unlike blatant lying or unintentional hallucination. We build an adversarial testbed mimicking a legislative environment where two LLMs play opposi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04325v2-abstract-full').style.display = 'inline'; document.getElementById('2405.04325v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04325v2-abstract-full" style="display: none;"> We explore the ability of large language model (LLM)-based agents to engage in subtle deception such as strategically phrasing and intentionally manipulating information to misguide and deceive other agents. This harmful behavior can be hard to detect, unlike blatant lying or unintentional hallucination. We build an adversarial testbed mimicking a legislative environment where two LLMs play opposing roles: a corporate *lobbyist* proposing amendments to bills that benefit a specific company while evading a *critic* trying to detect this deception. We use real-world legislative bills matched with potentially affected companies to ground these interactions. Our results show that LLM lobbyists initially exhibit limited deception against strong LLM critics which can be further improved through simple verbal reinforcement, significantly enhancing their deceptive capabilities, and increasing deception rates by up to 40 points. This highlights the risk of autonomous agents manipulating other agents through seemingly neutral language to attain self-serving goals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04325v2-abstract-full').style.display = 'none'; document.getElementById('2405.04325v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01573">arXiv:2405.01573</a> <span> [<a href="https://arxiv.org/pdf/2405.01573">pdf</a>, <a href="https://arxiv.org/format/2405.01573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Class-Level Code Generation from Natural Language Using Iterative, Tool-Enhanced Reasoning over Repository </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ajinkya Deshpande</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Anmol Agarwal</a>, <a href="/search/cs?searchtype=author&query=Shet%2C+S">Shashank Shet</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+A">Arun Iyer</a>, <a href="/search/cs?searchtype=author&query=Kanade%2C+A">Aditya Kanade</a>, <a href="/search/cs?searchtype=author&query=Bairi%2C+R">Ramakrishna Bairi</a>, <a href="/search/cs?searchtype=author&query=Parthasarathy%2C+S">Suresh Parthasarathy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01573v2-abstract-short" style="display: inline;"> LLMs have demonstrated significant potential in code generation tasks, achieving promising results at the function or statement level across various benchmarks. However, the complexities associated with creating code artifacts like classes, particularly within the context of real-world software repositories, remain underexplored. Prior research treats class-level generation as an isolated task, ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01573v2-abstract-full').style.display = 'inline'; document.getElementById('2405.01573v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01573v2-abstract-full" style="display: none;"> LLMs have demonstrated significant potential in code generation tasks, achieving promising results at the function or statement level across various benchmarks. However, the complexities associated with creating code artifacts like classes, particularly within the context of real-world software repositories, remain underexplored. Prior research treats class-level generation as an isolated task, neglecting the intricate dependencies & interactions that characterize real-world software environments. To address this gap, we introduce RepoClassBench, a comprehensive benchmark designed to rigorously evaluate LLMs in generating complex, class-level code within real-world repositories. RepoClassBench includes "Natural Language to Class generation" tasks across Java, Python & C# from a selection of repositories. We ensure that each class in our dataset not only has cross-file dependencies within the repository but also includes corresponding test cases to verify its functionality. We find that current models struggle with the realistic challenges posed by our benchmark, primarily due to their limited exposure to relevant repository contexts. To address this shortcoming, we introduce Retrieve-Repotools-Reflect (RRR), a novel approach that equips LLMs with static analysis tools to iteratively navigate & reason about repository-level context in an agent-based framework. Our experiments demonstrate that RRR significantly outperforms existing baselines on RepoClassBench, showcasing its effectiveness across programming languages & under various settings. Our findings emphasize the critical need for code-generation benchmarks to incorporate repo-level dependencies to more accurately reflect the complexities of software development. Our work shows the benefits of leveraging specialized tools to enhance LLMs' understanding of repository context. We plan to make our dataset & evaluation harness public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01573v2-abstract-full').style.display = 'none'; document.getElementById('2405.01573v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint with additional experiments</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08555">arXiv:2404.08555</a> <span> [<a href="https://arxiv.org/pdf/2404.08555">pdf</a>, <a href="https://arxiv.org/format/2404.08555">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RLHF Deciphered: A Critical Analysis of Reinforcement Learning from Human Feedback for LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chaudhari%2C+S">Shreyas Chaudhari</a>, <a href="/search/cs?searchtype=author&query=Aggarwal%2C+P">Pranjal Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=da+Silva%2C+B+C">Bruno Castro da Silva</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08555v2-abstract-short" style="display: inline;"> State-of-the-art large language models (LLMs) have become indispensable tools for various tasks. However, training LLMs to serve as effective assistants for humans requires careful consideration. A promising approach is reinforcement learning from human feedback (RLHF), which leverages human feedback to update the model in accordance with human preferences and mitigate issues like toxicity and hal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08555v2-abstract-full').style.display = 'inline'; document.getElementById('2404.08555v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08555v2-abstract-full" style="display: none;"> State-of-the-art large language models (LLMs) have become indispensable tools for various tasks. However, training LLMs to serve as effective assistants for humans requires careful consideration. A promising approach is reinforcement learning from human feedback (RLHF), which leverages human feedback to update the model in accordance with human preferences and mitigate issues like toxicity and hallucinations. Yet, an understanding of RLHF for LLMs is largely entangled with initial design choices that popularized the method and current research focuses on augmenting those choices rather than fundamentally improving the framework. In this paper, we analyze RLHF through the lens of reinforcement learning principles to develop an understanding of its fundamentals, dedicating substantial focus to the core component of RLHF -- the reward model. Our study investigates modeling choices, caveats of function approximation, and their implications on RLHF training algorithms, highlighting the underlying assumptions made about the expressivity of reward. Our analysis improves the understanding of the role of reward models and methods for their training, concurrently revealing limitations of the current methodology. We characterize these limitations, including incorrect generalization, model misspecification, and the sparsity of feedback, along with their impact on the performance of a language model. The discussion and analysis are substantiated by a categorical review of current literature, serving as a reference for researchers and practitioners to understand the challenges of RLHF and build upon existing efforts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08555v2-abstract-full').style.display = 'none'; document.getElementById('2404.08555v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05749">arXiv:2403.05749</a> <span> [<a href="https://arxiv.org/pdf/2403.05749">pdf</a>, <a href="https://arxiv.org/format/2403.05749">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> </div> </div> <p class="title is-5 mathjax"> Characterizing Flow Complexity in Transportation Networks using Graph Homology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+S+A">Shashank A Deshpande</a>, <a href="/search/cs?searchtype=author&query=Balakrishnan%2C+H">Hamsa Balakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05749v1-abstract-short" style="display: inline;"> Series-parallel network topologies generally exhibit simplified dynamical behavior and avoid high combinatorial complexity. A comprehensive analysis of how flow complexity emerges with a graph's deviation from series-parallel topology is therefore of fundamental interest. We introduce the notion of a robust $k$-path on a directed acycylic graph, with increasing values of the length $k$ reflecting… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05749v1-abstract-full').style.display = 'inline'; document.getElementById('2403.05749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05749v1-abstract-full" style="display: none;"> Series-parallel network topologies generally exhibit simplified dynamical behavior and avoid high combinatorial complexity. A comprehensive analysis of how flow complexity emerges with a graph's deviation from series-parallel topology is therefore of fundamental interest. We introduce the notion of a robust $k$-path on a directed acycylic graph, with increasing values of the length $k$ reflecting increasing deviations. We propose a graph homology with robust $k$-paths as the bases of its chain spaces. In this framework, the topological simplicity of series-parallel graphs translates into a triviality of higher-order chain spaces. We discuss a correspondence between the space of order-three chains and sites within the network that are susceptible to the Braess paradox, a well-known phenomenon in transportation networks. In this manner, we illustrate the utility of the proposed graph homology in sytematically studying the complexity of flow networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05749v1-abstract-full').style.display = 'none'; document.getElementById('2403.05749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 3 figures, letter</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11741">arXiv:2402.11741</a> <span> [<a href="https://arxiv.org/pdf/2402.11741">pdf</a>, <a href="https://arxiv.org/format/2402.11741">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> To Store or Not to Store: a graph theoretical approach for Dataset Versioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+A">Anxin Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingwei Li</a>, <a href="/search/cs?searchtype=author&query=Sukprasert%2C+P">Pattara Sukprasert</a>, <a href="/search/cs?searchtype=author&query=Khuller%2C+S">Samir Khuller</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amol Deshpande</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+K">Koyel Mukherjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11741v1-abstract-short" style="display: inline;"> In this work, we study the cost efficient data versioning problem, where the goal is to optimize the storage and reconstruction (retrieval) costs of data versions, given a graph of datasets as nodes and edges capturing edit/delta information. One central variant we study is MinSum Retrieval (MSR) where the goal is to minimize the total retrieval costs, while keeping the storage costs bounded. This… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11741v1-abstract-full').style.display = 'inline'; document.getElementById('2402.11741v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11741v1-abstract-full" style="display: none;"> In this work, we study the cost efficient data versioning problem, where the goal is to optimize the storage and reconstruction (retrieval) costs of data versions, given a graph of datasets as nodes and edges capturing edit/delta information. One central variant we study is MinSum Retrieval (MSR) where the goal is to minimize the total retrieval costs, while keeping the storage costs bounded. This problem (along with its variants) was introduced by Bhattacherjee et al. [VLDB'15]. While such problems are frequently encountered in collaborative tools (e.g., version control systems and data analysis pipelines), to the best of our knowledge, no existing research studies the theoretical aspects of these problems. We establish that the currently best-known heuristic, LMG, can perform arbitrarily badly in a simple worst case. Moreover, we show that it is hard to get $o(n)$-approximation for MSR on general graphs even if we relax the storage constraints by an $O(\log n)$ factor. Similar hardness results are shown for other variants. Meanwhile, we propose poly-time approximation schemes for tree-like graphs, motivated by the fact that the graphs arising in practice from typical edit operations are often not arbitrary. As version graphs typically have low treewidth, we further develop new algorithms for bounded treewidth graphs. Furthermore, we propose two new heuristics and evaluate them empirically. First, we extend LMG by considering more potential ``moves'', to propose a new heuristic LMG-All. LMG-All consistently outperforms LMG while having comparable run time on a wide variety of datasets, i.e., version graphs. Secondly, we apply our tree algorithms on the minimum-storage arborescence of an instance, yielding algorithms that are qualitatively better than all previous heuristics for MSR, as well as for another variant BoundedMin Retrieval (BMR). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11741v1-abstract-full').style.display = 'none'; document.getElementById('2402.11741v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IPDPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.06733">arXiv:2402.06733</a> <span> [<a href="https://arxiv.org/pdf/2402.06733">pdf</a>, <a href="https://arxiv.org/format/2402.06733">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NICE: To Optimize In-Context Examples or Not? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Srivastava%2C+P">Pragya Srivastava</a>, <a href="/search/cs?searchtype=author&query=Golechha%2C+S">Satvik Golechha</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.06733v3-abstract-short" style="display: inline;"> Recent work shows that in-context learning and optimization of in-context examples (ICE) can significantly improve the accuracy of large language models (LLMs) on a wide range of tasks, leading to an apparent consensus that ICE optimization is crucial for better performance. However, most of these studies assume a fixed or no instruction provided in the prompt. We challenge this consensus by inves… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06733v3-abstract-full').style.display = 'inline'; document.getElementById('2402.06733v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.06733v3-abstract-full" style="display: none;"> Recent work shows that in-context learning and optimization of in-context examples (ICE) can significantly improve the accuracy of large language models (LLMs) on a wide range of tasks, leading to an apparent consensus that ICE optimization is crucial for better performance. However, most of these studies assume a fixed or no instruction provided in the prompt. We challenge this consensus by investigating the necessity of optimizing ICE when task-specific instructions are provided and find that there are many tasks for which it yields diminishing returns. In particular, using a diverse set of tasks and a systematically created instruction set with gradually added details, we find that as the prompt instruction becomes more detailed, the returns on ICE optimization diminish. To characterize this behavior, we introduce a task-specific metric called Normalized Invariability to Choice of Examples (NICE) that quantifies the learnability of tasks from a given instruction, and provides a heuristic to help decide whether to optimize instructions or ICE for a new task. Given a task, the proposed metric can reliably predict the utility of optimizing ICE compared to using random ICE. Our code is available at https://github.com/microsoft/nice-icl. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06733v3-abstract-full').style.display = 'none'; document.getElementById('2402.06733v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a full paper (9 pages) at ACL 2024 (Main)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics 2024 (Volume 1: Long Papers) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.16735">arXiv:2312.16735</a> <span> [<a href="https://arxiv.org/pdf/2312.16735">pdf</a>, <a href="https://arxiv.org/format/2312.16735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Flock: A Low-Cost Streaming Query Engine on FaaS Platforms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+G">Gang Liao</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amol Deshpande</a>, <a href="/search/cs?searchtype=author&query=Abadi%2C+D+J">Daniel J. Abadi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.16735v4-abstract-short" style="display: inline;"> Existing serverless data analytics systems rely on external storage services like S3 for data shuffling and communication between cloud functions. While this approach provides the elasticity benefits of serverless computing, it incurs additional latency and cost overheads. We present Flock, a novel cloud-native streaming query engine that leverages the on-demand scalability of FaaS platforms for r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16735v4-abstract-full').style.display = 'inline'; document.getElementById('2312.16735v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.16735v4-abstract-full" style="display: none;"> Existing serverless data analytics systems rely on external storage services like S3 for data shuffling and communication between cloud functions. While this approach provides the elasticity benefits of serverless computing, it incurs additional latency and cost overheads. We present Flock, a novel cloud-native streaming query engine that leverages the on-demand scalability of FaaS platforms for real-time data analytics. Flock utilizes function invocation payloads for efficient data exchange, eliminating the need for external storage. This not only reduces latency and cost but also simplifies the architecture by removing the requirement for a centralized coordinator. Flock employs a template-based approach to dynamically create cloud functions for each query stage and a function group mechanism for handling data aggregation and shuffling. It supports both SQL and DataFrame APIs, making it easy to use. Our evaluation shows that Flock provides significant performance gains and cost savings compared to existing serverless and serverful streaming systems. It outperforms Apache Flink by 10-20x in cost while achieving similar latency and throughput. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.16735v4-abstract-full').style.display = 'none'; document.getElementById('2312.16735v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10534">arXiv:2312.10534</a> <span> [<a href="https://arxiv.org/pdf/2312.10534">pdf</a>, <a href="https://arxiv.org/format/2312.10534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Robustness of Model Attributions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kamath%2C+S">Sandesh Kamath</a>, <a href="/search/cs?searchtype=author&query=Mittal%2C+S">Sankalp Mittal</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V+N">Vineeth N Balasubramanian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10534v1-abstract-short" style="display: inline;"> For machine learning models to be reliable and trustworthy, their decisions must be interpretable. As these models find increasing use in safety-critical applications, it is important that not just the model predictions but also their explanations (as feature attributions) be robust to small human-imperceptible input perturbations. Recent works have shown that many attribution methods are fragile… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10534v1-abstract-full').style.display = 'inline'; document.getElementById('2312.10534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10534v1-abstract-full" style="display: none;"> For machine learning models to be reliable and trustworthy, their decisions must be interpretable. As these models find increasing use in safety-critical applications, it is important that not just the model predictions but also their explanations (as feature attributions) be robust to small human-imperceptible input perturbations. Recent works have shown that many attribution methods are fragile and have proposed improvements in either these methods or the model training. We observe two main causes for fragile attributions: first, the existing metrics of robustness (e.g., top-k intersection) over-penalize even reasonable local shifts in attribution, thereby making random perturbations to appear as a strong attack, and second, the attribution can be concentrated in a small region even when there are multiple important parts in an image. To rectify this, we propose simple ways to strengthen existing metrics and attribution methods that incorporate locality of pixels in robustness metrics and diversity of pixel locations in attributions. Towards the role of model training in attributional robustness, we empirically observe that adversarially trained models have more robust attributions on smaller datasets, however, this advantage disappears in larger datasets. Code is available at https://github.com/ksandeshk/LENS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10534v1-abstract-full').style.display = 'none'; document.getElementById('2312.10534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10396">arXiv:2312.10396</a> <span> [<a href="https://arxiv.org/pdf/2312.10396">pdf</a>, <a href="https://arxiv.org/ps/2312.10396">ps</a>, <a href="https://arxiv.org/format/2312.10396">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> How Far Can Fairness Constraints Help Recover From Biased Data? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+M">Mohit Sharma</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10396v4-abstract-short" style="display: inline;"> A general belief in fair classification is that fairness constraints incur a trade-off with accuracy, which biased data may worsen. Contrary to this belief, Blum & Stangl (2019) show that fair classification with equal opportunity constraints even on extremely biased data can recover optimally accurate and fair classifiers on the original data distribution. Their result is interesting because it d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10396v4-abstract-full').style.display = 'inline'; document.getElementById('2312.10396v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10396v4-abstract-full" style="display: none;"> A general belief in fair classification is that fairness constraints incur a trade-off with accuracy, which biased data may worsen. Contrary to this belief, Blum & Stangl (2019) show that fair classification with equal opportunity constraints even on extremely biased data can recover optimally accurate and fair classifiers on the original data distribution. Their result is interesting because it demonstrates that fairness constraints can implicitly rectify data bias and simultaneously overcome a perceived fairness-accuracy trade-off. Their data bias model simulates under-representation and label bias in underprivileged population, and they show the above result on a stylized data distribution with i.i.d. label noise, under simple conditions on the data distribution and bias parameters. We propose a general approach to extend the result of Blum & Stangl (2019) to different fairness constraints, data bias models, data distributions, and hypothesis classes. We strengthen their result, and extend it to the case when their stylized distribution has labels with Massart noise instead of i.i.d. noise. We prove a similar recovery result for arbitrary data distributions using fair reject option classifiers. We further generalize it to arbitrary data distributions and arbitrary hypothesis classes, i.e., we prove that for any data distribution, if the optimally accurate classifier in a given hypothesis class is fair and robust, then it can be recovered through fair classification with equal opportunity constraints on the biased distribution whenever the bias parameters satisfy certain simple conditions. Finally, we show applications of our technique to time-varying data bias in classification and fair machine learning pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10396v4-abstract-full').style.display = 'none'; document.getElementById('2312.10396v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05323">arXiv:2312.05323</a> <span> [<a href="https://arxiv.org/pdf/2312.05323">pdf</a>, <a href="https://arxiv.org/format/2312.05323">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> BaRiFlex: A Robotic Gripper with Versatility and Collision Robustness for Robot Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeong%2C+G">Gu-Cheol Jeong</a>, <a href="/search/cs?searchtype=author&query=Bahety%2C+A">Arpit Bahety</a>, <a href="/search/cs?searchtype=author&query=Pedraza%2C+G">Gabriel Pedraza</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+D">Ashish D. Deshpande</a>, <a href="/search/cs?searchtype=author&query=Mart%C3%ADn-Mart%C3%ADn%2C+R">Roberto Mart铆n-Mart铆n</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05323v1-abstract-short" style="display: inline;"> We present a new approach to robot hand design specifically suited for successfully implementing robot learning methods to accomplish tasks in daily human environments. We introduce BaRiFlex, an innovative gripper design that alleviates the issues caused by unexpected contact and collisions during robot learning, offering robustness, grasping versatility, task versatility, and simplicity to the le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05323v1-abstract-full').style.display = 'inline'; document.getElementById('2312.05323v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05323v1-abstract-full" style="display: none;"> We present a new approach to robot hand design specifically suited for successfully implementing robot learning methods to accomplish tasks in daily human environments. We introduce BaRiFlex, an innovative gripper design that alleviates the issues caused by unexpected contact and collisions during robot learning, offering robustness, grasping versatility, task versatility, and simplicity to the learning processes. This achievement is enabled by the incorporation of low-inertia actuators, providing high Back-drivability, and the strategic combination of Rigid and Flexible materials which enhances versatility and the gripper's resilience against unpredicted collisions. Furthermore, the integration of flexible Fin-Ray linkages and rigid linkages allows the gripper to execute compliant grasping and precise pinching. We conducted rigorous performance tests to characterize the novel gripper's compliance, durability, grasping and task versatility, and precision. We also integrated the BaRiFlex with a 7 Degree of Freedom (DoF) Franka Emika's Panda robotic arm to evaluate its capacity to support a trial-and-error (reinforcement learning) training procedure. The results of our experimental study are then compared to those obtained using the original rigid Franka Hand and a reference Fin-Ray soft gripper, demonstrating the superior capabilities and advantages of our developed gripper system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05323v1-abstract-full').style.display = 'none'; document.getElementById('2312.05323v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 6 figures, project website: https://robin-lab.cs.utexas.edu/bariflex/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04294">arXiv:2312.04294</a> <span> [<a href="https://arxiv.org/pdf/2312.04294">pdf</a>, <a href="https://arxiv.org/ps/2312.04294">ps</a>, <a href="https://arxiv.org/format/2312.04294">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Energy-Efficient Internet of Things Monitoring with Content-Based Wake-Up Radio </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+A">Anay Ajit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Chiariotti%2C+F">Federico Chiariotti</a>, <a href="/search/cs?searchtype=author&query=Zanella%2C+A">Andrea Zanella</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04294v1-abstract-short" style="display: inline;"> The use of Wake-Up Radio (WUR) in Internet of Things (IoT) networks can significantly improve their energy efficiency: battery-powered sensors can remain in a low-power (sleep) mode while listening for wake-up messages using their WUR and reactivate only when polled. However, polling-based WUR may still lead to wasted energy if values sensed by the polled sensors provide no new information to the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04294v1-abstract-full').style.display = 'inline'; document.getElementById('2312.04294v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04294v1-abstract-full" style="display: none;"> The use of Wake-Up Radio (WUR) in Internet of Things (IoT) networks can significantly improve their energy efficiency: battery-powered sensors can remain in a low-power (sleep) mode while listening for wake-up messages using their WUR and reactivate only when polled. However, polling-based WUR may still lead to wasted energy if values sensed by the polled sensors provide no new information to the receiver, or in general have a low Value of Information (VoI). In this paper, we design a content-based WUR that tracks the process observed by the sensors and only wakes up the sensor if its estimated update's VoI is higher than a threshold communicated through the poll. If the sensor does not reply to the polling request, the Gateway (GW) can make a Bayesian update, knowing that either the sensor value substantially confirms its current estimate or the transmission failed due to the wireless channel. We analyze the trade-off between the tracking error and the battery lifetime of the sensors, showing that content-based WUR can provide fine-grained control of this trade-off and significantly increase the battery lifetime of the node with a minimal Mean Squared Error (MSE) increase. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04294v1-abstract-full').style.display = 'none'; document.getElementById('2312.04294v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.00348">arXiv:2312.00348</a> <span> [<a href="https://arxiv.org/pdf/2312.00348">pdf</a>, <a href="https://arxiv.org/format/2312.00348">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Student Activity Recognition in Classroom Environments using Transfer Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Anagha Deshpande</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+V">Vedant Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.00348v1-abstract-short" style="display: inline;"> The recent advances in artificial intelligence and deep learning facilitate automation in various applications including home automation, smart surveillance systems, and healthcare among others. Human Activity Recognition is one of its emerging applications, which can be implemented in a classroom environment to enhance safety, efficiency, and overall educational quality. This paper proposes a sys… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00348v1-abstract-full').style.display = 'inline'; document.getElementById('2312.00348v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.00348v1-abstract-full" style="display: none;"> The recent advances in artificial intelligence and deep learning facilitate automation in various applications including home automation, smart surveillance systems, and healthcare among others. Human Activity Recognition is one of its emerging applications, which can be implemented in a classroom environment to enhance safety, efficiency, and overall educational quality. This paper proposes a system for detecting and recognizing the activities of students in a classroom environment. The dataset has been structured and recorded by the authors since a standard dataset for this task was not available at the time of this study. Transfer learning, a widely adopted method within the field of deep learning, has proven to be helpful in complex tasks like image and video processing. Pretrained models including VGG-16, ResNet-50, InceptionV3, and Xception are used for feature extraction and classification tasks. Xception achieved an accuracy of 93%, on the novel classroom dataset, outperforming the other three models in consideration. The system proposed in this study aims to introduce a safer and more productive learning environment for students and educators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00348v1-abstract-full').style.display = 'none'; document.getElementById('2312.00348v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 12 figures, accepted at the IEEE International Conference on Computational Intelligence, Networks and Security (ICCINS) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09735">arXiv:2311.09735</a> <span> [<a href="https://arxiv.org/pdf/2311.09735">pdf</a>, <a href="https://arxiv.org/format/2311.09735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> GEO: Generative Engine Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aggarwal%2C+P">Pranjal Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09735v3-abstract-short" style="display: inline;"> The advent of large language models (LLMs) has ushered in a new paradigm of search engines that use generative models to gather and summarize information to answer user queries. This emerging technology, which we formalize under the unified framework of generative engines (GEs), can generate accurate and personalized responses, rapidly replacing traditional search engines like Google and Bing. Gen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09735v3-abstract-full').style.display = 'inline'; document.getElementById('2311.09735v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09735v3-abstract-full" style="display: none;"> The advent of large language models (LLMs) has ushered in a new paradigm of search engines that use generative models to gather and summarize information to answer user queries. This emerging technology, which we formalize under the unified framework of generative engines (GEs), can generate accurate and personalized responses, rapidly replacing traditional search engines like Google and Bing. Generative Engines typically satisfy queries by synthesizing information from multiple sources and summarizing them using LLMs. While this shift significantly improves $\textit{user}$ utility and $\textit{generative search engine}$ traffic, it poses a huge challenge for the third stakeholder -- website and content creators. Given the black-box and fast-moving nature of generative engines, content creators have little to no control over $\textit{when}$ and $\textit{how}$ their content is displayed. With generative engines here to stay, we must ensure the creator economy is not disadvantaged. To address this, we introduce Generative Engine Optimization (GEO), the first novel paradigm to aid content creators in improving their content visibility in generative engine responses through a flexible black-box optimization framework for optimizing and defining visibility metrics. We facilitate systematic evaluation by introducing GEO-bench, a large-scale benchmark of diverse user queries across multiple domains, along with relevant web sources to answer these queries. Through rigorous evaluation, we demonstrate that GEO can boost visibility by up to $40\%$ in generative engine responses. Moreover, we show the efficacy of these strategies varies across domains, underscoring the need for domain-specific optimization methods. Our work opens a new frontier in information discovery systems, with profound implications for both developers of generative engines and content creators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09735v3-abstract-full').style.display = 'none'; document.getElementById('2311.09735v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to KDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.04892">arXiv:2311.04892</a> <span> [<a href="https://arxiv.org/pdf/2311.04892">pdf</a>, <a href="https://arxiv.org/format/2311.04892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Bias Runs Deep: Implicit Reasoning Biases in Persona-Assigned LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+S">Shashank Gupta</a>, <a href="/search/cs?searchtype=author&query=Shrivastava%2C+V">Vaishnavi Shrivastava</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+P">Peter Clark</a>, <a href="/search/cs?searchtype=author&query=Sabharwal%2C+A">Ashish Sabharwal</a>, <a href="/search/cs?searchtype=author&query=Khot%2C+T">Tushar Khot</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.04892v2-abstract-short" style="display: inline;"> Recent works have showcased the ability of LLMs to embody diverse personas in their responses, exemplified by prompts like 'You are Yoda. Explain the Theory of Relativity.' While this ability allows personalization of LLMs and enables human behavior simulation, its effect on LLMs' capabilities remains unclear. To fill this gap, we present the first extensive study of the unintended side-effects of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04892v2-abstract-full').style.display = 'inline'; document.getElementById('2311.04892v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.04892v2-abstract-full" style="display: none;"> Recent works have showcased the ability of LLMs to embody diverse personas in their responses, exemplified by prompts like 'You are Yoda. Explain the Theory of Relativity.' While this ability allows personalization of LLMs and enables human behavior simulation, its effect on LLMs' capabilities remains unclear. To fill this gap, we present the first extensive study of the unintended side-effects of persona assignment on the ability of LLMs to perform basic reasoning tasks. Our study covers 24 reasoning datasets, 4 LLMs, and 19 diverse personas (e.g. an Asian person) spanning 5 socio-demographic groups. Our experiments unveil that LLMs harbor deep rooted bias against various socio-demographics underneath a veneer of fairness. While they overtly reject stereotypes when explicitly asked ('Are Black people less skilled at mathematics?'), they manifest stereotypical and erroneous presumptions when asked to answer questions while adopting a persona. These can be observed as abstentions in responses, e.g., 'As a Black person, I can't answer this question as it requires math knowledge', and generally result in a substantial performance drop. Our experiments with ChatGPT-3.5 show that this bias is ubiquitous - 80% of our personas demonstrate bias; it is significant - some datasets show performance drops of 70%+; and can be especially harmful for certain groups - some personas suffer statistically significant drops on 80%+ of the datasets. Overall, all 4 LLMs exhibit this bias to varying extents, with GPT-4-Turbo showing the least but still a problematic amount of bias (evident in 42% of the personas). Further analysis shows that these persona-induced errors can be hard-to-discern and hard-to-avoid. Our findings serve as a cautionary tale that the practice of assigning personas to LLMs - a trend on the rise - can surface their deep-rooted biases and have unforeseeable and detrimental side-effects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04892v2-abstract-full').style.display = 'none'; document.getElementById('2311.04892v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://allenai.github.io/persona-bias. Paper to appear at ICLR 2024. Added results for other LLMs in v2 (similar findings)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02807">arXiv:2311.02807</a> <span> [<a href="https://arxiv.org/pdf/2311.02807">pdf</a>, <a href="https://arxiv.org/format/2311.02807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> QualEval: Qualitative Evaluation for Model Improvement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+P">Peter Clark</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Sabharwal%2C+A">Ashish Sabharwal</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02807v2-abstract-short" style="display: inline;"> Quantitative evaluation metrics have traditionally been pivotal in gauging the advancements of artificial intelligence systems, including large language models (LLMs). However, these metrics have inherent limitations. Given the intricate nature of real-world tasks, a single scalar to quantify and compare is insufficient to capture the fine-grained nuances of model behavior. Metrics serve only as a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02807v2-abstract-full').style.display = 'inline'; document.getElementById('2311.02807v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02807v2-abstract-full" style="display: none;"> Quantitative evaluation metrics have traditionally been pivotal in gauging the advancements of artificial intelligence systems, including large language models (LLMs). However, these metrics have inherent limitations. Given the intricate nature of real-world tasks, a single scalar to quantify and compare is insufficient to capture the fine-grained nuances of model behavior. Metrics serve only as a way to compare and benchmark models, and do not yield actionable diagnostics, thus making the model improvement process challenging. Model developers find themselves amid extensive manual efforts involving sifting through vast datasets and attempting hit-or-miss adjustments to training data or setups. In this work, we address the shortcomings of quantitative metrics by proposing QualEval, which augments quantitative scalar metrics with automated qualitative evaluation as a vehicle for model improvement. QualEval uses a powerful LLM reasoner and our novel flexible linear programming solver to generate human-readable insights that when applied, accelerate model improvement. The insights are backed by a comprehensive dashboard with fine-grained visualizations and human-interpretable analyses. We corroborate the faithfulness of QualEval by demonstrating that leveraging its insights, for example, improves the absolute performance of the Llama 2 model by up to 15% points relative on a challenging dialogue task (DialogSum) when compared to baselines. QualEval successfully increases the pace of model development, thus in essence serving as a data-scientist-in-a-box. Given the focus on critiquing and improving current evaluation metrics, our method serves as a refreshingly new technique for both model evaluation and improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02807v2-abstract-full').style.display = 'none'; document.getElementById('2311.02807v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.12972">arXiv:2310.12972</a> <span> [<a href="https://arxiv.org/pdf/2310.12972">pdf</a>, <a href="https://arxiv.org/format/2310.12972">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> CCIL: Continuity-based Data Augmentation for Corrective Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ke%2C+L">Liyiming Ke</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yunchu Zhang</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Abhay Deshpande</a>, <a href="/search/cs?searchtype=author&query=Srinivasa%2C+S">Siddhartha Srinivasa</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Abhishek Gupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.12972v2-abstract-short" style="display: inline;"> We present a new technique to enhance the robustness of imitation learning methods by generating corrective data to account for compounding errors and disturbances. While existing methods rely on interactive expert labeling, additional offline datasets, or domain-specific invariances, our approach requires minimal additional assumptions beyond access to expert data. The key insight is to leverage… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12972v2-abstract-full').style.display = 'inline'; document.getElementById('2310.12972v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.12972v2-abstract-full" style="display: none;"> We present a new technique to enhance the robustness of imitation learning methods by generating corrective data to account for compounding errors and disturbances. While existing methods rely on interactive expert labeling, additional offline datasets, or domain-specific invariances, our approach requires minimal additional assumptions beyond access to expert data. The key insight is to leverage local continuity in the environment dynamics to generate corrective labels. Our method first constructs a dynamics model from the expert demonstration, encouraging local Lipschitz continuity in the learned model. In locally continuous regions, this model allows us to generate corrective labels within the neighborhood of the demonstrations but beyond the actual set of states and actions in the dataset. Training on this augmented data enhances the agent's ability to recover from perturbations and deal with compounding errors. We demonstrate the effectiveness of our generated labels through experiments in a variety of robotics domains in simulation that have distinct forms of continuity and discontinuity, including classic control problems, drone flying, navigation with high-dimensional sensor observations, legged locomotion, and tabletop manipulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12972v2-abstract-full').style.display = 'none'; document.getElementById('2310.12972v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.10294">arXiv:2310.10294</a> <span> [<a href="https://arxiv.org/pdf/2310.10294">pdf</a>, <a href="https://arxiv.org/format/2310.10294">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Key-phrase boosted unsupervised summary generation for FinTech organization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Aadit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+S">Shreya Goyal</a>, <a href="/search/cs?searchtype=author&query=Nagwanshi%2C+P">Prateek Nagwanshi</a>, <a href="/search/cs?searchtype=author&query=Tripathy%2C+A">Avinash Tripathy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.10294v1-abstract-short" style="display: inline;"> With the recent advances in social media, the use of NLP techniques in social media data analysis has become an emerging research direction. Business organizations can particularly benefit from such an analysis of social media discourse, providing an external perspective on consumer behavior. Some of the NLP applications such as intent detection, sentiment classification, text summarization can he… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10294v1-abstract-full').style.display = 'inline'; document.getElementById('2310.10294v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.10294v1-abstract-full" style="display: none;"> With the recent advances in social media, the use of NLP techniques in social media data analysis has become an emerging research direction. Business organizations can particularly benefit from such an analysis of social media discourse, providing an external perspective on consumer behavior. Some of the NLP applications such as intent detection, sentiment classification, text summarization can help FinTech organizations to utilize the social media language data to find useful external insights and can be further utilized for downstream NLP tasks. Particularly, a summary which highlights the intents and sentiments of the users can be very useful for these organizations to get an external perspective. This external perspective can help organizations to better manage their products, offers, promotional campaigns, etc. However, certain challenges, such as a lack of labeled domain-specific datasets impede further exploration of these tasks in the FinTech domain. To overcome these challenges, we design an unsupervised phrase-based summary generation from social media data, using 'Action-Object' pairs (intent phrases). We evaluated the proposed method with other key-phrase based summary generation methods in the direction of contextual information of various Reddit discussion threads, available in the different summaries. We introduce certain "Context Metrics" such as the number of Unique words, Action-Object pairs, and Noun chunks to evaluate the contextual information retrieved from the source text in these phrase-based summaries. We demonstrate that our methods significantly outperform the baseline on these metrics, thus providing a qualitative and quantitative measure of their efficacy. Proposed framework has been leveraged as a web utility portal hosted within Amex. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10294v1-abstract-full').style.display = 'none'; document.getElementById('2310.10294v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.01892">arXiv:2310.01892</a> <span> [<a href="https://arxiv.org/pdf/2310.01892">pdf</a>, <a href="https://arxiv.org/ps/2310.01892">ps</a>, <a href="https://arxiv.org/format/2310.01892">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FiGURe: Simple and Efficient Unsupervised Node Representations with Filter Augmentations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ekbote%2C+C">Chanakya Ekbote</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+P">Ajinkya Pankaj Deshpande</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+A">Arun Iyer</a>, <a href="/search/cs?searchtype=author&query=Bairi%2C+R">Ramakrishna Bairi</a>, <a href="/search/cs?searchtype=author&query=Sellamanickam%2C+S">Sundararajan Sellamanickam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.01892v2-abstract-short" style="display: inline;"> Unsupervised node representations learnt using contrastive learning-based methods have shown good performance on downstream tasks. However, these methods rely on augmentations that mimic low-pass filters, limiting their performance on tasks requiring different eigen-spectrum parts. This paper presents a simple filter-based augmentation method to capture different parts of the eigen-spectrum. We sh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01892v2-abstract-full').style.display = 'inline'; document.getElementById('2310.01892v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.01892v2-abstract-full" style="display: none;"> Unsupervised node representations learnt using contrastive learning-based methods have shown good performance on downstream tasks. However, these methods rely on augmentations that mimic low-pass filters, limiting their performance on tasks requiring different eigen-spectrum parts. This paper presents a simple filter-based augmentation method to capture different parts of the eigen-spectrum. We show significant improvements using these augmentations. Further, we show that sharing the same weights across these different filter augmentations is possible, reducing the computational load. In addition, previous works have shown that good performance on downstream tasks requires high dimensional representations. Working with high dimensions increases the computations, especially when multiple augmentations are involved. We mitigate this problem and recover good performance through lower dimensional embeddings using simple random Fourier feature projections. Our method, FiGURe achieves an average gain of up to 4.4%, compared to the state-of-the-art unsupervised models, across all datasets in consideration, both homophilic and heterophilic. Our code can be found at: https://github.com/microsoft/figure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01892v2-abstract-full').style.display = 'none'; document.getElementById('2310.01892v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.03750">arXiv:2309.03750</a> <span> [<a href="https://arxiv.org/pdf/2309.03750">pdf</a>, <a href="https://arxiv.org/format/2309.03750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PBP: Path-based Trajectory Prediction for Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Afshar%2C+S">Sepideh Afshar</a>, <a href="/search/cs?searchtype=author&query=Deo%2C+N">Nachiket Deo</a>, <a href="/search/cs?searchtype=author&query=Bhagat%2C+A">Akshay Bhagat</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+T">Titas Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yunming Shao</a>, <a href="/search/cs?searchtype=author&query=Buddharaju%2C+B+R">Balarama Raju Buddharaju</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Adwait Deshpande</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+H">Henggang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.03750v2-abstract-short" style="display: inline;"> Trajectory prediction plays a crucial role in the autonomous driving stack by enabling autonomous vehicles to anticipate the motion of surrounding agents. Goal-based prediction models have gained traction in recent years for addressing the multimodal nature of future trajectories. Goal-based prediction models simplify multimodal prediction by first predicting 2D goal locations of agents and then p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.03750v2-abstract-full').style.display = 'inline'; document.getElementById('2309.03750v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.03750v2-abstract-full" style="display: none;"> Trajectory prediction plays a crucial role in the autonomous driving stack by enabling autonomous vehicles to anticipate the motion of surrounding agents. Goal-based prediction models have gained traction in recent years for addressing the multimodal nature of future trajectories. Goal-based prediction models simplify multimodal prediction by first predicting 2D goal locations of agents and then predicting trajectories conditioned on each goal. However, a single 2D goal location serves as a weak inductive bias for predicting the whole trajectory, often leading to poor map compliance, i.e., part of the trajectory going off-road or breaking traffic rules. In this paper, we improve upon goal-based prediction by proposing the Path-based prediction (PBP) approach. PBP predicts a discrete probability distribution over reference paths in the HD map using the path features and predicts trajectories in the path-relative Frenet frame. We applied the PBP trajectory decoder on top of the HiVT scene encoder and report results on the Argoverse dataset. Our experiments show that PBP achieves competitive performance on the standard trajectory prediction metrics, while significantly outperforming state-of-the-art baselines in terms of map compliance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.03750v2-abstract-full').style.display = 'none'; document.getElementById('2309.03750v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ICRA 2024; Sepideh Afshar and Nachiket Deo contributed equally</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.02710">arXiv:2309.02710</a> <span> [<a href="https://arxiv.org/pdf/2309.02710">pdf</a>, <a href="https://arxiv.org/ps/2309.02710">ps</a>, <a href="https://arxiv.org/format/2309.02710">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Improved Outlier Robust Seeding for k-means </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Pratap%2C+R">Rameshwar Pratap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.02710v1-abstract-short" style="display: inline;"> The $k$-means is a popular clustering objective, although it is inherently non-robust and sensitive to outliers. Its popular seeding or initialization called $k$-means++ uses $D^{2}$ sampling and comes with a provable $O(\log k)$ approximation guarantee \cite{AV2007}. However, in the presence of adversarial noise or outliers, $D^{2}$ sampling is more likely to pick centers from distant outliers in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02710v1-abstract-full').style.display = 'inline'; document.getElementById('2309.02710v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.02710v1-abstract-full" style="display: none;"> The $k$-means is a popular clustering objective, although it is inherently non-robust and sensitive to outliers. Its popular seeding or initialization called $k$-means++ uses $D^{2}$ sampling and comes with a provable $O(\log k)$ approximation guarantee \cite{AV2007}. However, in the presence of adversarial noise or outliers, $D^{2}$ sampling is more likely to pick centers from distant outliers instead of inlier clusters, and therefore its approximation guarantees \textit{w.r.t.} $k$-means solution on inliers, does not hold. Assuming that the outliers constitute a constant fraction of the given data, we propose a simple variant in the $D^2$ sampling distribution, which makes it robust to the outliers. Our algorithm runs in $O(ndk)$ time, outputs $O(k)$ clusters, discards marginally more points than the optimal number of outliers, and comes with a provable $O(1)$ approximation guarantee. Our algorithm can also be modified to output exactly $k$ clusters instead of $O(k)$ clusters, while keeping its running time linear in $n$ and $d$. This is an improvement over previous results for robust $k$-means based on LP relaxation and rounding \cite{Charikar}, \cite{KrishnaswamyLS18} and \textit{robust $k$-means++} \cite{DeshpandeKP20}. Our empirical results show the advantage of our algorithm over $k$-means++~\cite{AV2007}, uniform random seeding, greedy sampling for $k$ means~\cite{tkmeanspp}, and robust $k$-means++~\cite{DeshpandeKP20}, on standard real-world and synthetic data sets used in previous work. Our proposal is easily amenable to scalable, faster, parallel implementations of $k$-means++ \cite{Bahmani,BachemL017} and is of independent interest for coreset constructions in the presence of outliers \cite{feldman2007ptas,langberg2010universal,feldman2011unified}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02710v1-abstract-full').style.display = 'none'; document.getElementById('2309.02710v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00133">arXiv:2309.00133</a> <span> [<a href="https://arxiv.org/pdf/2309.00133">pdf</a>, <a href="https://arxiv.org/format/2309.00133">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Distraction-free Embeddings for Robust VQA </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dogra%2C+A">Atharvan Dogra</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+D">Deeksha Varshney</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+N">Neeraj Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00133v1-abstract-short" style="display: inline;"> The generation of effective latent representations and their subsequent refinement to incorporate precise information is an essential prerequisite for Vision-Language Understanding (VLU) tasks such as Video Question Answering (VQA). However, most existing methods for VLU focus on sparsely sampling or fine-graining the input information (e.g., sampling a sparse set of frames or text tokens), or add… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00133v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00133v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00133v1-abstract-full" style="display: none;"> The generation of effective latent representations and their subsequent refinement to incorporate precise information is an essential prerequisite for Vision-Language Understanding (VLU) tasks such as Video Question Answering (VQA). However, most existing methods for VLU focus on sparsely sampling or fine-graining the input information (e.g., sampling a sparse set of frames or text tokens), or adding external knowledge. We present a novel "DRAX: Distraction Removal and Attended Cross-Alignment" method to rid our cross-modal representations of distractors in the latent space. We do not exclusively confine the perception of any input information from various modalities but instead use an attention-guided distraction removal method to increase focus on task-relevant information in latent embeddings. DRAX also ensures semantic alignment of embeddings during cross-modal fusions. We evaluate our approach on a challenging benchmark (SUTD-TrafficQA dataset), testing the framework's abilities for feature and event queries, temporal relation understanding, forecasting, hypothesis, and causal analysis through extensive experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00133v1-abstract-full').style.display = 'none'; document.getElementById('2309.00133v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13242">arXiv:2308.13242</a> <span> [<a href="https://arxiv.org/pdf/2308.13242">pdf</a>, <a href="https://arxiv.org/format/2308.13242">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Group-Fair Plackett-Luce Ranking Models for Relevance and Ex-Post Fairness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gorantla%2C+S">Sruthi Gorantla</a>, <a href="/search/cs?searchtype=author&query=Bhansali%2C+E">Eshaan Bhansali</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Louis%2C+A">Anand Louis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13242v1-abstract-short" style="display: inline;"> In learning-to-rank (LTR), optimizing only the relevance (or the expected ranking utility) can cause representational harm to certain categories of items. Moreover, if there is implicit bias in the relevance scores, LTR models may fail to optimize for true relevance. Previous works have proposed efficient algorithms to train stochastic ranking models that achieve fairness of exposure to the groups… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13242v1-abstract-full').style.display = 'inline'; document.getElementById('2308.13242v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13242v1-abstract-full" style="display: none;"> In learning-to-rank (LTR), optimizing only the relevance (or the expected ranking utility) can cause representational harm to certain categories of items. Moreover, if there is implicit bias in the relevance scores, LTR models may fail to optimize for true relevance. Previous works have proposed efficient algorithms to train stochastic ranking models that achieve fairness of exposure to the groups ex-ante (or, in expectation), which may not guarantee representation fairness to the groups ex-post, that is, after realizing a ranking from the stochastic ranking model. Typically, ex-post fairness is achieved by post-processing, but previous work does not train stochastic ranking models that are aware of this post-processing. In this paper, we propose a novel objective that maximizes expected relevance only over those rankings that satisfy given representation constraints to ensure ex-post fairness. Building upon recent work on an efficient sampler for ex-post group-fair rankings, we propose a group-fair Plackett-Luce model and show that it can be efficiently optimized for our objective in the LTR framework. Experiments on three real-world datasets show that our group-fair algorithm guarantees fairness alongside usually having better relevance compared to the LTR baselines. In addition, our algorithm also achieves better relevance than post-processing baselines, which also ensures ex-post fairness. Further, when implicit bias is injected into the training data, our algorithm typically outperforms existing LTR baselines in relevance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13242v1-abstract-full').style.display = 'none'; document.getElementById('2308.13242v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.14910">arXiv:2307.14910</a> <span> [<a href="https://arxiv.org/pdf/2307.14910">pdf</a>, <a href="https://arxiv.org/format/2307.14910">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MedComNet58619.2023.10168852">10.1109/MedComNet58619.2023.10168852 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Low-Latency Massive Access with Multicast Wake Up Radio </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+A">Anay Ajit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Chiariotti%2C+F">Federico Chiariotti</a>, <a href="/search/cs?searchtype=author&query=Zanella%2C+A">Andrea Zanella</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.14910v1-abstract-short" style="display: inline;"> The use of Wake-Up Radio (WUR) in Internet of Things (IoT) networks can significantly improve their energy efficiency: battery-powered sensors can remain in a low-power (sleep) mode while listening for wake-up messages using their WUR and reactivate only when polled, saving energy. However, polling-based Time Division Multiple Access (TDMA) may significantly increase data transmission delay if pac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14910v1-abstract-full').style.display = 'inline'; document.getElementById('2307.14910v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.14910v1-abstract-full" style="display: none;"> The use of Wake-Up Radio (WUR) in Internet of Things (IoT) networks can significantly improve their energy efficiency: battery-powered sensors can remain in a low-power (sleep) mode while listening for wake-up messages using their WUR and reactivate only when polled, saving energy. However, polling-based Time Division Multiple Access (TDMA) may significantly increase data transmission delay if packets are generated sporadically, as nodes with no information still need to be polled. In this paper, we examine the effect of multicast polling for WUR-enabled wireless nodes. The idea is to assign nodes to multicast groups so that all nodes in the same group can be solicited by a multicast polling message. This may cause collisions, which can be solved by requesting retransmissions from the involved nodes. We analyze the performance of different multicast polling and retransmission strategies, showing that the optimal approach can significantly reduce the delay over TDMA and ALOHA in low-traffic scenarios while keeping good energy efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14910v1-abstract-full').style.display = 'none'; document.getElementById('2307.14910v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2023 21st Mediterranean Communication and Computer Networking Conference (MedComNet)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.08593">arXiv:2307.08593</a> <span> [<a href="https://arxiv.org/pdf/2307.08593">pdf</a>, <a href="https://arxiv.org/format/2307.08593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Accelerator Physics">physics.acc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Theory">nucl-th</span> </div> </div> <p class="title is-5 mathjax"> Artificial Intelligence for the Electron Ion Collider (AI4EIC) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Allaire%2C+C">C. Allaire</a>, <a href="/search/cs?searchtype=author&query=Ammendola%2C+R">R. Ammendola</a>, <a href="/search/cs?searchtype=author&query=Aschenauer%2C+E+-">E. -C. Aschenauer</a>, <a href="/search/cs?searchtype=author&query=Balandat%2C+M">M. Balandat</a>, <a href="/search/cs?searchtype=author&query=Battaglieri%2C+M">M. Battaglieri</a>, <a href="/search/cs?searchtype=author&query=Bernauer%2C+J">J. Bernauer</a>, <a href="/search/cs?searchtype=author&query=Bond%C3%AC%2C+M">M. Bond矛</a>, <a href="/search/cs?searchtype=author&query=Branson%2C+N">N. Branson</a>, <a href="/search/cs?searchtype=author&query=Britton%2C+T">T. Britton</a>, <a href="/search/cs?searchtype=author&query=Butter%2C+A">A. Butter</a>, <a href="/search/cs?searchtype=author&query=Chahrour%2C+I">I. Chahrour</a>, <a href="/search/cs?searchtype=author&query=Chatagnon%2C+P">P. Chatagnon</a>, <a href="/search/cs?searchtype=author&query=Cisbani%2C+E">E. Cisbani</a>, <a href="/search/cs?searchtype=author&query=Cline%2C+E+W">E. W. Cline</a>, <a href="/search/cs?searchtype=author&query=Dash%2C+S">S. Dash</a>, <a href="/search/cs?searchtype=author&query=Dean%2C+C">C. Dean</a>, <a href="/search/cs?searchtype=author&query=Deconinck%2C+W">W. Deconinck</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">A. Deshpande</a>, <a href="/search/cs?searchtype=author&query=Diefenthaler%2C+M">M. Diefenthaler</a>, <a href="/search/cs?searchtype=author&query=Ent%2C+R">R. Ent</a>, <a href="/search/cs?searchtype=author&query=Fanelli%2C+C">C. Fanelli</a>, <a href="/search/cs?searchtype=author&query=Finger%2C+M">M. Finger</a>, <a href="/search/cs?searchtype=author&query=Finger%2C%2C+M">M. Finger, Jr.</a>, <a href="/search/cs?searchtype=author&query=Fol%2C+E">E. Fol</a>, <a href="/search/cs?searchtype=author&query=Furletov%2C+S">S. Furletov</a> , et al. (70 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.08593v1-abstract-short" style="display: inline;"> The Electron-Ion Collider (EIC), a state-of-the-art facility for studying the strong force, is expected to begin commissioning its first experiments in 2028. This is an opportune time for artificial intelligence (AI) to be included from the start at this facility and in all phases that lead up to the experiments. The second annual workshop organized by the AI4EIC working group, which recently took… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08593v1-abstract-full').style.display = 'inline'; document.getElementById('2307.08593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.08593v1-abstract-full" style="display: none;"> The Electron-Ion Collider (EIC), a state-of-the-art facility for studying the strong force, is expected to begin commissioning its first experiments in 2028. This is an opportune time for artificial intelligence (AI) to be included from the start at this facility and in all phases that lead up to the experiments. The second annual workshop organized by the AI4EIC working group, which recently took place, centered on exploring all current and prospective application areas of AI for the EIC. This workshop is not only beneficial for the EIC, but also provides valuable insights for the newly established ePIC collaboration at EIC. This paper summarizes the different activities and R&D projects covered across the sessions of the workshop and provides an overview of the goals, approaches and strategies regarding AI/ML in the EIC community, as well as cutting-edge techniques currently studied in other experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08593v1-abstract-full').style.display = 'none'; document.getElementById('2307.08593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 11 figures, AI4EIC workshop, tutorials and hackathon</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.00259">arXiv:2307.00259</a> <span> [<a href="https://arxiv.org/pdf/2307.00259">pdf</a>, <a href="https://arxiv.org/format/2307.00259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> InstructEval: Systematic Evaluation of Instruction Selection Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ajith%2C+A">Anirudh Ajith</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Chris Pan</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+M">Mengzhou Xia</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.00259v2-abstract-short" style="display: inline;"> In-context learning (ICL) performs tasks by prompting a large language model (LLM) using an instruction and a small set of annotated examples called demonstrations. Recent work has shown that precise details of the inputs used in the ICL prompt significantly impact performance, which has incentivized instruction selection algorithms. The effect of instruction-choice however is severely underexplor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.00259v2-abstract-full').style.display = 'inline'; document.getElementById('2307.00259v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.00259v2-abstract-full" style="display: none;"> In-context learning (ICL) performs tasks by prompting a large language model (LLM) using an instruction and a small set of annotated examples called demonstrations. Recent work has shown that precise details of the inputs used in the ICL prompt significantly impact performance, which has incentivized instruction selection algorithms. The effect of instruction-choice however is severely underexplored, with existing analyses restricted to shallow subsets of models and tasks, limiting the generalizability of their insights. We develop InstructEval, an ICL evaluation suite to conduct a thorough assessment of these techniques. The suite includes 13 open-sourced LLMs of varying scales from four model families, and covers nine tasks across three categories. Using the suite, we evaluate the relative performance of seven popular instruction selection methods over five metrics relevant to ICL. Our experiments reveal that using curated manually-written instructions or simple instructions without any task-specific descriptions often elicits superior ICL performance overall than that of automatic instruction-induction methods, pointing to a lack of generalizability among the latter. We release our evaluation suite for benchmarking instruction selection approaches and enabling more generalizable methods in this space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.00259v2-abstract-full').style.display = 'none'; document.getElementById('2307.00259v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 content pages + 3 pages of supplementary material, 3 figures, 10 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.11964">arXiv:2306.11964</a> <span> [<a href="https://arxiv.org/pdf/2306.11964">pdf</a>, <a href="https://arxiv.org/format/2306.11964">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sampling Individually-Fair Rankings that are Always Group Fair </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gorantla%2C+S">Sruthi Gorantla</a>, <a href="/search/cs?searchtype=author&query=Mehrotra%2C+A">Anay Mehrotra</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Louis%2C+A">Anand Louis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.11964v1-abstract-short" style="display: inline;"> Rankings on online platforms help their end-users find the relevant information -- people, news, media, and products -- quickly. Fair ranking tasks, which ask to rank a set of items to maximize utility subject to satisfying group-fairness constraints, have gained significant interest in the Algorithmic Fairness, Information Retrieval, and Machine Learning literature. Recent works, however, identif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11964v1-abstract-full').style.display = 'inline'; document.getElementById('2306.11964v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.11964v1-abstract-full" style="display: none;"> Rankings on online platforms help their end-users find the relevant information -- people, news, media, and products -- quickly. Fair ranking tasks, which ask to rank a set of items to maximize utility subject to satisfying group-fairness constraints, have gained significant interest in the Algorithmic Fairness, Information Retrieval, and Machine Learning literature. Recent works, however, identify uncertainty in the utilities of items as a primary cause of unfairness and propose introducing randomness in the output. This randomness is carefully chosen to guarantee an adequate representation of each item (while accounting for the uncertainty). However, due to this randomness, the output rankings may violate group fairness constraints. We give an efficient algorithm that samples rankings from an individually-fair distribution while ensuring that every output ranking is group fair. The expected utility of the output ranking is at least $伪$ times the utility of the optimal fair solution. Here, $伪$ depends on the utilities, position-discounts, and constraints -- it approaches 1 as the range of utilities or the position-discounts shrinks, or when utilities satisfy distributional assumptions. Empirically, we observe that our algorithm achieves individual and group fairness and that Pareto dominates the state-of-the-art baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11964v1-abstract-full').style.display = 'none'; document.getElementById('2306.11964v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Full version of a paper accepted for presentation in ACM AIES 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.11072">arXiv:2306.11072</a> <span> [<a href="https://arxiv.org/pdf/2306.11072">pdf</a>, <a href="https://arxiv.org/format/2306.11072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Causal Effect Regularization: Automated Detection and Removal of Spurious Attributes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Abhinav Kumar</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+A">Amit Sharma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.11072v2-abstract-short" style="display: inline;"> In many classification datasets, the task labels are spuriously correlated with some input attributes. Classifiers trained on such datasets often rely on these attributes for prediction, especially when the spurious correlation is high, and thus fail to generalize whenever there is a shift in the attributes' correlation at deployment. If we assume that the spurious attributes are known a priori, s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11072v2-abstract-full').style.display = 'inline'; document.getElementById('2306.11072v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.11072v2-abstract-full" style="display: none;"> In many classification datasets, the task labels are spuriously correlated with some input attributes. Classifiers trained on such datasets often rely on these attributes for prediction, especially when the spurious correlation is high, and thus fail to generalize whenever there is a shift in the attributes' correlation at deployment. If we assume that the spurious attributes are known a priori, several methods have been proposed to learn a classifier that is invariant to the specified attributes. However, in real-world data, information about spurious attributes is typically unavailable. Therefore, we propose a method to automatically identify spurious attributes by estimating their causal effect on the label and then use a regularization objective to mitigate the classifier's reliance on them. Compared to a recent method for identifying spurious attributes, we find that our method is more accurate in removing the attribute from the learned model, especially when spurious correlation is high. Specifically, across synthetic, semi-synthetic, and real-world datasets, our method shows significant improvement in a metric used to quantify the dependence of a classifier on spurious attributes ($螖$Prob), while obtaining better or similar accuracy. In addition, our method mitigates the reliance on spurious attributes even under noisy estimation of causal effects. To explain the empirical robustness of our method, we create a simple linear classification task with two sets of attributes: causal and spurious. We prove that our method only requires that the ranking of estimated causal effects is correct across attributes to select the correct classifier. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11072v2-abstract-full').style.display = 'none'; document.getElementById('2306.11072v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15093">arXiv:2305.15093</a> <span> [<a href="https://arxiv.org/pdf/2305.15093">pdf</a>, <a href="https://arxiv.org/format/2305.15093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> C-STS: Conditional Semantic Textual Similarity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Jimenez%2C+C+E">Carlos E. Jimenez</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Howard Chen</a>, <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a>, <a href="/search/cs?searchtype=author&query=Graf%2C+V">Victoria Graf</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Danqi Chen</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15093v2-abstract-short" style="display: inline;"> Semantic textual similarity (STS), a cornerstone task in NLP, measures the degree of similarity between a pair of sentences, and has broad application in fields such as information retrieval and natural language understanding. However, sentence similarity can be inherently ambiguous, depending on the specific aspect of interest. We resolve this ambiguity by proposing a novel task called Conditiona… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15093v2-abstract-full').style.display = 'inline'; document.getElementById('2305.15093v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15093v2-abstract-full" style="display: none;"> Semantic textual similarity (STS), a cornerstone task in NLP, measures the degree of similarity between a pair of sentences, and has broad application in fields such as information retrieval and natural language understanding. However, sentence similarity can be inherently ambiguous, depending on the specific aspect of interest. We resolve this ambiguity by proposing a novel task called Conditional STS (C-STS) which measures sentences' similarity conditioned on an feature described in natural language (hereon, condition). As an example, the similarity between the sentences "The NBA player shoots a three-pointer." and "A man throws a tennis ball into the air to serve." is higher for the condition "The motion of the ball" (both upward) and lower for "The size of the ball" (one large and one small). C-STS's advantages are two-fold: (1) it reduces the subjectivity and ambiguity of STS and (2) enables fine-grained language model evaluation through diverse natural language conditions. We put several state-of-the-art models to the test, and even those performing well on STS (e.g. SimCSE, Flan-T5, and GPT-4) find C-STS challenging; all with Spearman correlation scores below 50. To encourage a more comprehensive evaluation of semantic similarity and natural language understanding, we make nearly 19K C-STS examples and code available for others to train and test their models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15093v2-abstract-full').style.display = 'none'; document.getElementById('2305.15093v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14784">arXiv:2305.14784</a> <span> [<a href="https://arxiv.org/pdf/2305.14784">pdf</a>, <a href="https://arxiv.org/format/2305.14784">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Anthropomorphization of AI: Opportunities and Risks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14784v1-abstract-short" style="display: inline;"> Anthropomorphization is the tendency to attribute human-like traits to non-human entities. It is prevalent in many social contexts -- children anthropomorphize toys, adults do so with brands, and it is a literary device. It is also a versatile tool in science, with behavioral psychology and evolutionary biology meticulously documenting its consequences. With widespread adoption of AI systems, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14784v1-abstract-full').style.display = 'inline'; document.getElementById('2305.14784v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14784v1-abstract-full" style="display: none;"> Anthropomorphization is the tendency to attribute human-like traits to non-human entities. It is prevalent in many social contexts -- children anthropomorphize toys, adults do so with brands, and it is a literary device. It is also a versatile tool in science, with behavioral psychology and evolutionary biology meticulously documenting its consequences. With widespread adoption of AI systems, and the push from stakeholders to make it human-like through alignment techniques, human voice, and pictorial avatars, the tendency for users to anthropomorphize it increases significantly. We take a dyadic approach to understanding this phenomenon with large language models (LLMs) by studying (1) the objective legal implications, as analyzed through the lens of the recent blueprint of AI bill of rights and the (2) subtle psychological aspects customization and anthropomorphization. We find that anthropomorphized LLMs customized for different user bases violate multiple provisions in the legislative blueprint. In addition, we point out that anthropomorphization of LLMs affects the influence they can have on their users, thus having the potential to fundamentally change the nature of human-AI interaction, with potential for manipulation and negative influence. With LLMs being hyper-personalized for vulnerable groups like children and patients among others, our work is a timely and important contribution. We propose a conservative strategy for the cautious use of anthropomorphization to improve trustworthiness of AI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14784v1-abstract-full').style.display = 'none'; document.getElementById('2305.14784v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.05335">arXiv:2304.05335</a> <span> [<a href="https://arxiv.org/pdf/2304.05335">pdf</a>, <a href="https://arxiv.org/format/2304.05335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Toxicity in ChatGPT: Analyzing Persona-assigned Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a>, <a href="/search/cs?searchtype=author&query=Rajpurohit%2C+T">Tanmay Rajpurohit</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.05335v1-abstract-short" style="display: inline;"> Large language models (LLMs) have shown incredible capabilities and transcended the natural language processing (NLP) community, with adoption throughout many services like healthcare, therapy, education, and customer service. Since users include people with critical information needs like students or patients engaging with chatbots, the safety of these systems is of prime importance. Therefore, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05335v1-abstract-full').style.display = 'inline'; document.getElementById('2304.05335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.05335v1-abstract-full" style="display: none;"> Large language models (LLMs) have shown incredible capabilities and transcended the natural language processing (NLP) community, with adoption throughout many services like healthcare, therapy, education, and customer service. Since users include people with critical information needs like students or patients engaging with chatbots, the safety of these systems is of prime importance. Therefore, a clear understanding of the capabilities and limitations of LLMs is necessary. To this end, we systematically evaluate toxicity in over half a million generations of ChatGPT, a popular dialogue-based LLM. We find that setting the system parameter of ChatGPT by assigning it a persona, say that of the boxer Muhammad Ali, significantly increases the toxicity of generations. Depending on the persona assigned to ChatGPT, its toxicity can increase up to 6x, with outputs engaging in incorrect stereotypes, harmful dialogue, and hurtful opinions. This may be potentially defamatory to the persona and harmful to an unsuspecting user. Furthermore, we find concerning patterns where specific entities (e.g., certain races) are targeted more than others (3x more) irrespective of the assigned persona, that reflect inherent discriminatory biases in the model. We hope that our findings inspire the broader AI community to rethink the efficacy of current safety guardrails and develop better techniques that lead to robust, safe, and trustworthy AI systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05335v1-abstract-full').style.display = 'none'; document.getElementById('2304.05335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.05508">arXiv:2303.05508</a> <span> [<a href="https://arxiv.org/pdf/2303.05508">pdf</a>, <a href="https://arxiv.org/format/2303.05508">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Cherry-Picking with Reinforcement Learning : Robust Dynamic Grasping in Unstable Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yunchu Zhang</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+L">Liyiming Ke</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Abhay Deshpande</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Abhishek Gupta</a>, <a href="/search/cs?searchtype=author&query=Srinivasa%2C+S">Siddhartha Srinivasa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.05508v2-abstract-short" style="display: inline;"> Grasping small objects surrounded by unstable or non-rigid material plays a crucial role in applications such as surgery, harvesting, construction, disaster recovery, and assisted feeding. This task is especially difficult when fine manipulation is required in the presence of sensor noise and perception errors; errors inevitably trigger dynamic motion, which is challenging to model precisely. Circ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05508v2-abstract-full').style.display = 'inline'; document.getElementById('2303.05508v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.05508v2-abstract-full" style="display: none;"> Grasping small objects surrounded by unstable or non-rigid material plays a crucial role in applications such as surgery, harvesting, construction, disaster recovery, and assisted feeding. This task is especially difficult when fine manipulation is required in the presence of sensor noise and perception errors; errors inevitably trigger dynamic motion, which is challenging to model precisely. Circumventing the difficulty to build accurate models for contacts and dynamics, data-driven methods like reinforcement learning (RL) can optimize task performance via trial and error, reducing the need for accurate models of contacts and dynamics. Applying RL methods to real robots, however, has been hindered by factors such as prohibitively high sample complexity or the high training infrastructure cost for providing resets on hardware. This work presents CherryBot, an RL system that uses chopsticks for fine manipulation that surpasses human reactiveness for some dynamic grasping tasks. By integrating imprecise simulators, suboptimal demonstrations and external state estimation, we study how to make a real-world robot learning system sample efficient and general while reducing the human effort required for supervision. Our system shows continual improvement through 30 minutes of real-world interaction: through reactive retry, it achieves an almost 100% success rate on the demanding task of using chopsticks to grasp small objects swinging in the air. We demonstrate the reactiveness, robustness and generalizability of CherryBot to varying object shapes and dynamics (e.g., external disturbances like wind and human perturbations). Videos are available at https://goodcherrybot.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05508v2-abstract-full').style.display = 'none'; document.getElementById('2303.05508v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.13191">arXiv:2302.13191</a> <span> [<a href="https://arxiv.org/pdf/2302.13191">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCDS.2023.3250393">10.1109/TCDS.2023.3250393 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DeepCPG Policies for Robot Locomotion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+M">Aditya M. Deshpande</a>, <a href="/search/cs?searchtype=author&query=Hurd%2C+E">Eric Hurd</a>, <a href="/search/cs?searchtype=author&query=Minai%2C+A+A">Ali A. Minai</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+M">Manish Kumar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.13191v1-abstract-short" style="display: inline;"> Central Pattern Generators (CPGs) form the neural basis of the observed rhythmic behaviors for locomotion in legged animals. The CPG dynamics organized into networks allow the emergence of complex locomotor behaviors. In this work, we take this inspiration for developing walking behaviors in multi-legged robots. We present novel DeepCPG policies that embed CPGs as a layer in a larger neural networ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.13191v1-abstract-full').style.display = 'inline'; document.getElementById('2302.13191v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.13191v1-abstract-full" style="display: none;"> Central Pattern Generators (CPGs) form the neural basis of the observed rhythmic behaviors for locomotion in legged animals. The CPG dynamics organized into networks allow the emergence of complex locomotor behaviors. In this work, we take this inspiration for developing walking behaviors in multi-legged robots. We present novel DeepCPG policies that embed CPGs as a layer in a larger neural network and facilitate end-to-end learning of locomotion behaviors in deep reinforcement learning (DRL) setup. We demonstrate the effectiveness of this approach on physics engine-based insectoid robots. We show that, compared to traditional approaches, DeepCPG policies allow sample-efficient end-to-end learning of effective locomotion strategies even in the case of high-dimensional sensor spaces (vision). We scale the DeepCPG policies using a modular robot configuration and multi-agent DRL. Our results suggest that gradual complexification with embedded priors of these policies in a modular fashion could achieve non-trivial sensor and motor integration on a robot platform. These results also indicate the efficacy of bootstrapping more complex intelligent systems from simpler ones based on biological principles. Finally, we present the experimental results for a proof-of-concept insectoid robot system for which DeepCPG learned policies initially using the simulation engine and these were afterwards transferred to real-world robots without any additional fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.13191v1-abstract-full').style.display = 'none'; document.getElementById('2302.13191v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint of paper accepted for publication in IEEE Transaction On Cognitive and Developmental Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.12441">arXiv:2302.12441</a> <span> [<a href="https://arxiv.org/pdf/2302.12441">pdf</a>, <a href="https://arxiv.org/format/2302.12441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MUX-PLMs: Data Multiplexing for High-throughput Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Murahari%2C+V">Vishvak Murahari</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Jimenez%2C+C+E">Carlos E. Jimenez</a>, <a href="/search/cs?searchtype=author&query=Shafran%2C+I">Izhak Shafran</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mingqiu Wang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuan Cao</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.12441v2-abstract-short" style="display: inline;"> The widespread adoption of large language models such as ChatGPT and Bard has led to unprecedented demand for these technologies. The burgeoning cost of inference for ever-increasing model sizes coupled with hardware shortages has limited affordable access and poses a pressing need for efficiency approaches geared towards high throughput and performance. Multi-input multi-output (MIMO) algorithms… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12441v2-abstract-full').style.display = 'inline'; document.getElementById('2302.12441v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.12441v2-abstract-full" style="display: none;"> The widespread adoption of large language models such as ChatGPT and Bard has led to unprecedented demand for these technologies. The burgeoning cost of inference for ever-increasing model sizes coupled with hardware shortages has limited affordable access and poses a pressing need for efficiency approaches geared towards high throughput and performance. Multi-input multi-output (MIMO) algorithms such as data multiplexing, offer a promising solution with a many-fold increase in throughput by performing inference for multiple inputs at the cost of a single input. Yet these approaches are not currently performant enough to be deployed in modern systems. We change that by developing MUX-PLMs, a class of high throughput pre-trained language models (PLMs) trained with data multiplexing, that can be fine-tuned for any downstream task to yield high-throughput high-performance. Our novel multiplexing and demultiplexing modules proficiently entangle and disentangle inputs, and enable high-performance high throughput \muxplms{} that are competitive with vanilla PLMs while achieving 2x/5x inference speedup with only a $1-4\%$ drop on a broad suite of tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12441v2-abstract-full').style.display = 'none'; document.getElementById('2302.12441v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.05906">arXiv:2302.05906</a> <span> [<a href="https://arxiv.org/pdf/2302.05906">pdf</a>, <a href="https://arxiv.org/format/2302.05906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> On Comparing Fair Classifiers under Data Bias </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+M">Mohit Sharma</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+R+R">Rajiv Ratn Shah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.05906v2-abstract-short" style="display: inline;"> In this paper, we consider a theoretical model for injecting data bias, namely, under-representation and label bias (Blum & Stangl, 2019). We empirically study the effect of varying data biases on the accuracy and fairness of fair classifiers. Through extensive experiments on both synthetic and real-world datasets (e.g., Adult, German Credit, Bank Marketing, COMPAS), we empirically audit pre-, in-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05906v2-abstract-full').style.display = 'inline'; document.getElementById('2302.05906v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.05906v2-abstract-full" style="display: none;"> In this paper, we consider a theoretical model for injecting data bias, namely, under-representation and label bias (Blum & Stangl, 2019). We empirically study the effect of varying data biases on the accuracy and fairness of fair classifiers. Through extensive experiments on both synthetic and real-world datasets (e.g., Adult, German Credit, Bank Marketing, COMPAS), we empirically audit pre-, in-, and post-processing fair classifiers from standard fairness toolkits for their fairness and accuracy by injecting varying amounts of under-representation and label bias in their training data (but not the test data). Our main observations are: 1. The fairness and accuracy of many standard fair classifiers degrade severely as the bias injected in their training data increases, 2. A simple logistic regression model trained on the right data can often outperform, in both accuracy and fairness, most fair classifiers trained on biased training data, and 3. A few, simple fairness techniques (e.g., reweighing, exponentiated gradients) seem to offer stable accuracy and fairness guarantees even when their training data is injected with under-representation and label bias. Our experiments also show how to integrate a measure of data bias risk in the existing fairness dashboards for real-world deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.05906v2-abstract-full').style.display = 'none'; document.getElementById('2302.05906v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a Spotlight Presentation at Algorithmic Fairness through the Lens of Time, Neurips 2023 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.11309">arXiv:2301.11309</a> <span> [<a href="https://arxiv.org/pdf/2301.11309">pdf</a>, <a href="https://arxiv.org/format/2301.11309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SemSup-XC: Semantic Supervision for Zero and Few-shot Extreme Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aggarwal%2C+P">Pranjal Aggarwal</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.11309v2-abstract-short" style="display: inline;"> Extreme classification (XC) involves predicting over large numbers of classes (thousands to millions), with real-world applications like news article classification and e-commerce product tagging. The zero-shot version of this task requires generalization to novel classes without additional supervision. In this paper, we develop SemSup-XC, a model that achieves state-of-the-art zero-shot and few-s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.11309v2-abstract-full').style.display = 'inline'; document.getElementById('2301.11309v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.11309v2-abstract-full" style="display: none;"> Extreme classification (XC) involves predicting over large numbers of classes (thousands to millions), with real-world applications like news article classification and e-commerce product tagging. The zero-shot version of this task requires generalization to novel classes without additional supervision. In this paper, we develop SemSup-XC, a model that achieves state-of-the-art zero-shot and few-shot performance on three XC datasets derived from legal, e-commerce, and Wikipedia data. To develop SemSup-XC, we use automatically collected semantic class descriptions to represent classes and facilitate generalization through a novel hybrid matching module that matches input instances to class descriptions using a combination of semantic and lexical similarity. Trained with contrastive learning, SemSup-XC significantly outperforms baselines and establishes state-of-the-art performance on all three datasets considered, gaining up to 12 precision points on zero-shot and more than 10 precision points on one-shot tests, with similar gains for recall@10. Our ablation studies highlight the relative importance of our hybrid matching module and automatically collected class descriptions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.11309v2-abstract-full').style.display = 'none'; document.getElementById('2301.11309v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ICML 2023. V2: camera ready version at ICML 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.09158">arXiv:2301.09158</a> <span> [<a href="https://arxiv.org/pdf/2301.09158">pdf</a>, <a href="https://arxiv.org/format/2301.09158">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Differential Spiral Joint Mechanism for Coupled Variable Stiffness Actuation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+M">Mincheol Kim</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ashish Deshpande</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.09158v1-abstract-short" style="display: inline;"> In this study, we present the Differential Spiral Joint (DSJ) mechanism for variable stiffness actuation in tendon-driven robots. The DSJ mechanism semi-decouples the modulation of position and mechanical stiffness, allowing independent trajectory tracking in different parameter space. Past studies show that increasing the mechanical stiffness achieves the wider range of renderable stiffness, wher… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09158v1-abstract-full').style.display = 'inline'; document.getElementById('2301.09158v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.09158v1-abstract-full" style="display: none;"> In this study, we present the Differential Spiral Joint (DSJ) mechanism for variable stiffness actuation in tendon-driven robots. The DSJ mechanism semi-decouples the modulation of position and mechanical stiffness, allowing independent trajectory tracking in different parameter space. Past studies show that increasing the mechanical stiffness achieves the wider range of renderable stiffness, whereas decreasing the mechanical stiffness improves the quality of actuator decoupling and shock absorbance. Therefore, it is often useful to modulate the mechanical stiffness to balance the required level of stiffness and safety. In addition, the DSJ mechanism offers a compact form factor, which is suitable for applications where the size and weight are important. The performance of the DSJ mechanism in various areas is validated through a set of experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09158v1-abstract-full').style.display = 'none'; document.getElementById('2301.09158v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10582">arXiv:2212.10582</a> <span> [<a href="https://arxiv.org/pdf/2212.10582">pdf</a>, <a href="https://arxiv.org/format/2212.10582">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PhysRevLett.131.030601">10.1103/PhysRevLett.131.030601 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Sharp complexity phase transitions generated by entanglement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghosh%2C+S">Soumik Ghosh</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Abhinav Deshpande</a>, <a href="/search/cs?searchtype=author&query=Hangleiter%2C+D">Dominik Hangleiter</a>, <a href="/search/cs?searchtype=author&query=Gorshkov%2C+A+V">Alexey V. Gorshkov</a>, <a href="/search/cs?searchtype=author&query=Fefferman%2C+B">Bill Fefferman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10582v1-abstract-short" style="display: inline;"> Entanglement is one of the physical properties of quantum systems responsible for the computational hardness of simulating quantum systems. But while the runtime of specific algorithms, notably tensor network algorithms, explicitly depends on the amount of entanglement in the system, it is unknown whether this connection runs deeper and entanglement can also cause inherent, algorithm-independent c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10582v1-abstract-full').style.display = 'inline'; document.getElementById('2212.10582v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10582v1-abstract-full" style="display: none;"> Entanglement is one of the physical properties of quantum systems responsible for the computational hardness of simulating quantum systems. But while the runtime of specific algorithms, notably tensor network algorithms, explicitly depends on the amount of entanglement in the system, it is unknown whether this connection runs deeper and entanglement can also cause inherent, algorithm-independent complexity. In this work, we quantitatively connect the entanglement present in certain quantum systems to the computational complexity of simulating those systems. Moreover, we completely characterize the entanglement and complexity as a function of a system parameter. Specifically, we consider the task of simulating single-qubit measurements of $k$--regular graph states on $n$ qubits. We show that, as the regularity parameter is increased from $1$ to $n-1$, there is a sharp transition from an easy regime with low entanglement to a hard regime with high entanglement at $k=3$, and a transition back to easy and low entanglement at $k=n-3$. As a key technical result, we prove a duality for the simulation complexity of regular graph states between low and high regularity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10582v1-abstract-full').style.display = 'none'; document.getElementById('2212.10582v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.16634">arXiv:2211.16634</a> <span> [<a href="https://arxiv.org/pdf/2211.16634">pdf</a>, <a href="https://arxiv.org/format/2211.16634">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SPARTAN: Sparse Hierarchical Memory for Parameter-Efficient Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Sultan%2C+M+A">Md Arafat Sultan</a>, <a href="/search/cs?searchtype=author&query=Ferritto%2C+A">Anthony Ferritto</a>, <a href="/search/cs?searchtype=author&query=Kalyan%2C+A">Ashwin Kalyan</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a>, <a href="/search/cs?searchtype=author&query=Sil%2C+A">Avirup Sil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.16634v1-abstract-short" style="display: inline;"> Fine-tuning pre-trained language models (PLMs) achieves impressive performance on a range of downstream tasks, and their sizes have consequently been getting bigger. Since a different copy of the model is required for each task, this paradigm is infeasible for storage-constrained edge devices like mobile phones. In this paper, we propose SPARTAN, a parameter efficient (PE) and computationally fast… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.16634v1-abstract-full').style.display = 'inline'; document.getElementById('2211.16634v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.16634v1-abstract-full" style="display: none;"> Fine-tuning pre-trained language models (PLMs) achieves impressive performance on a range of downstream tasks, and their sizes have consequently been getting bigger. Since a different copy of the model is required for each task, this paradigm is infeasible for storage-constrained edge devices like mobile phones. In this paper, we propose SPARTAN, a parameter efficient (PE) and computationally fast architecture for edge devices that adds hierarchically organized sparse memory after each Transformer layer. SPARTAN freezes the PLM parameters and fine-tunes only its memory, thus significantly reducing storage costs by re-using the PLM backbone for different tasks. SPARTAN contains two levels of memory, with only a sparse subset of parents being chosen in the first level for each input, and children cells corresponding to those parents being used to compute an output representation. This sparsity combined with other architecture optimizations improves SPARTAN's throughput by over 90% during inference on a Raspberry Pi 4 when compared to PE baselines (adapters) while also outperforming the latter by 0.1 points on the GLUE benchmark. Further, it can be trained 34% faster in a few-shot setting, while performing within 0.9 points of adapters. Qualitative analysis shows that different parent cells in SPARTAN specialize in different topics, thus dividing responsibility efficiently. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.16634v1-abstract-full').style.display = 'none'; document.getElementById('2211.16634v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.08547">arXiv:2211.08547</a> <span> [<a href="https://arxiv.org/pdf/2211.08547">pdf</a>, <a href="https://arxiv.org/format/2211.08547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ALIGN-MLM: Word Embedding Alignment is Crucial for Multilingual Pre-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+H">Henry Tang</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.08547v1-abstract-short" style="display: inline;"> Multilingual pre-trained models exhibit zero-shot cross-lingual transfer, where a model fine-tuned on a source language achieves surprisingly good performance on a target language. While studies have attempted to understand transfer, they focus only on MLM, and the large number of differences between natural languages makes it hard to disentangle the importance of different properties. In this wor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08547v1-abstract-full').style.display = 'inline'; document.getElementById('2211.08547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.08547v1-abstract-full" style="display: none;"> Multilingual pre-trained models exhibit zero-shot cross-lingual transfer, where a model fine-tuned on a source language achieves surprisingly good performance on a target language. While studies have attempted to understand transfer, they focus only on MLM, and the large number of differences between natural languages makes it hard to disentangle the importance of different properties. In this work, we specifically highlight the importance of word embedding alignment by proposing a pre-training objective (ALIGN-MLM) whose auxiliary loss guides similar words in different languages to have similar word embeddings. ALIGN-MLM either outperforms or matches three widely adopted objectives (MLM, XLM, DICT-MLM) when we evaluate transfer between pairs of natural languages and their counterparts created by systematically modifying specific properties like the script. In particular, ALIGN-MLM outperforms XLM and MLM by 35 and 30 F1 points on POS-tagging for transfer between languages that differ both in their script and word order (left-to-right v.s. right-to-left). We also show a strong correlation between alignment and transfer for all objectives (e.g., rho=0.727 for XNLI), which together with ALIGN-MLM's strong performance calls for explicitly aligning word embeddings for multilingual models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08547v1-abstract-full').style.display = 'none'; document.getElementById('2211.08547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.08388">arXiv:2211.08388</a> <span> [<a href="https://arxiv.org/pdf/2211.08388">pdf</a>, <a href="https://arxiv.org/format/2211.08388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/mnras/stac3336">10.1093/mnras/stac3336 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Photometric identification of compact galaxies, stars and quasars using multiple neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chaini%2C+S">Siddharth Chaini</a>, <a href="/search/cs?searchtype=author&query=Bagul%2C+A">Atharva Bagul</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Anish Deshpande</a>, <a href="/search/cs?searchtype=author&query=Gondkar%2C+R">Rishi Gondkar</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+K">Kaushal Sharma</a>, <a href="/search/cs?searchtype=author&query=Vivek%2C+M">M. Vivek</a>, <a href="/search/cs?searchtype=author&query=Kembhavi%2C+A">Ajit Kembhavi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.08388v1-abstract-short" style="display: inline;"> We present MargNet, a deep learning-based classifier for identifying stars, quasars and compact galaxies using photometric parameters and images from the Sloan Digital Sky Survey (SDSS) Data Release 16 (DR16) catalogue. MargNet consists of a combination of Convolutional Neural Network (CNN) and Artificial Neural Network (ANN) architectures. Using a carefully curated dataset consisting of 240,000 c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08388v1-abstract-full').style.display = 'inline'; document.getElementById('2211.08388v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.08388v1-abstract-full" style="display: none;"> We present MargNet, a deep learning-based classifier for identifying stars, quasars and compact galaxies using photometric parameters and images from the Sloan Digital Sky Survey (SDSS) Data Release 16 (DR16) catalogue. MargNet consists of a combination of Convolutional Neural Network (CNN) and Artificial Neural Network (ANN) architectures. Using a carefully curated dataset consisting of 240,000 compact objects and an additional 150,000 faint objects, the machine learns classification directly from the data, minimising the need for human intervention. MargNet is the first classifier focusing exclusively on compact galaxies and performs better than other methods to classify compact galaxies from stars and quasars, even at fainter magnitudes. This model and feature engineering in such deep learning architectures will provide greater success in identifying objects in the ongoing and upcoming surveys, such as Dark Energy Survey (DES) and images from the Vera C. Rubin Observatory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08388v1-abstract-full').style.display = 'none'; document.getElementById('2211.08388v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures, Accepted for publication in MNRAS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.06106">arXiv:2211.06106</a> <span> [<a href="https://arxiv.org/pdf/2211.06106">pdf</a>, <a href="https://arxiv.org/format/2211.06106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Identifying, measuring, and mitigating individual unfairness for supervised learning models and application to credit risk models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shahsavarifar%2C+R">Rasoul Shahsavarifar</a>, <a href="/search/cs?searchtype=author&query=Chandran%2C+J">Jithu Chandran</a>, <a href="/search/cs?searchtype=author&query=Inchiosa%2C+M">Mario Inchiosa</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Schlener%2C+M">Mario Schlener</a>, <a href="/search/cs?searchtype=author&query=Gossain%2C+V">Vishal Gossain</a>, <a href="/search/cs?searchtype=author&query=Elias%2C+Y">Yara Elias</a>, <a href="/search/cs?searchtype=author&query=Murali%2C+V">Vinaya Murali</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.06106v1-abstract-short" style="display: inline;"> In the past few years, Artificial Intelligence (AI) has garnered attention from various industries including financial services (FS). AI has made a positive impact in financial services by enhancing productivity and improving risk management. While AI can offer efficient solutions, it has the potential to bring unintended consequences. One such consequence is the pronounced effect of AI-related un… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06106v1-abstract-full').style.display = 'inline'; document.getElementById('2211.06106v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.06106v1-abstract-full" style="display: none;"> In the past few years, Artificial Intelligence (AI) has garnered attention from various industries including financial services (FS). AI has made a positive impact in financial services by enhancing productivity and improving risk management. While AI can offer efficient solutions, it has the potential to bring unintended consequences. One such consequence is the pronounced effect of AI-related unfairness and attendant fairness-related harms. These fairness-related harms could involve differential treatment of individuals; for example, unfairly denying a loan to certain individuals or groups of individuals. In this paper, we focus on identifying and mitigating individual unfairness and leveraging some of the recently published techniques in this domain, especially as applicable to the credit adjudication use case. We also investigate the extent to which techniques for achieving individual fairness are effective at achieving group fairness. Our main contribution in this work is functionalizing a two-step training process which involves learning a fair similarity metric from a group sense using a small portion of the raw data and training an individually "fair" classifier using the rest of the data where the sensitive features are excluded. The key characteristic of this two-step technique is related to its flexibility, i.e., the fair metric obtained in the first step can be used with any other individual fairness algorithms in the second step. Furthermore, we developed a second metric (distinct from the fair similarity metric) to determine how fairly a model is treating similar individuals. We use this metric to compare a "fair" model against its baseline model in terms of their individual fairness value. Finally, some experimental results corresponding to the individual unfairness mitigation techniques are presented. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06106v1-abstract-full').style.display = 'none'; document.getElementById('2211.06106v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.10095">arXiv:2208.10095</a> <span> [<a href="https://arxiv.org/pdf/2208.10095">pdf</a>, <a href="https://arxiv.org/format/2208.10095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Socially Fair Center-based and Linear Subspace Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gorantla%2C+S">Sruthi Gorantla</a>, <a href="/search/cs?searchtype=author&query=Gowda%2C+K+N">Kishen N. Gowda</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Amit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Louis%2C+A">Anand Louis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.10095v1-abstract-short" style="display: inline;"> Center-based clustering (e.g., $k$-means, $k$-medians) and clustering using linear subspaces are two most popular techniques to partition real-world data into smaller clusters. However, when the data consists of sensitive demographic groups, significantly different clustering cost per point for different sensitive groups can lead to fairness-related harms (e.g., different quality-of-service). The… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.10095v1-abstract-full').style.display = 'inline'; document.getElementById('2208.10095v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.10095v1-abstract-full" style="display: none;"> Center-based clustering (e.g., $k$-means, $k$-medians) and clustering using linear subspaces are two most popular techniques to partition real-world data into smaller clusters. However, when the data consists of sensitive demographic groups, significantly different clustering cost per point for different sensitive groups can lead to fairness-related harms (e.g., different quality-of-service). The goal of socially fair clustering is to minimize the maximum cost of clustering per point over all groups. In this work, we propose a unified framework to solve socially fair center-based clustering and linear subspace clustering, and give practical, efficient approximation algorithms for these problems. We do extensive experiments to show that on multiple benchmark datasets our algorithms either closely match or outperform state-of-the-art baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.10095v1-abstract-full').style.display = 'none'; document.getElementById('2208.10095v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.14036">arXiv:2205.14036</a> <span> [<a href="https://arxiv.org/pdf/2205.14036">pdf</a>, <a href="https://arxiv.org/format/2205.14036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> StereoKG: Data-Driven Knowledge Graph Construction for Cultural Knowledge and Stereotypes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A">Awantee Deshpande</a>, <a href="/search/cs?searchtype=author&query=Ruiter%2C+D">Dana Ruiter</a>, <a href="/search/cs?searchtype=author&query=Mosbach%2C+M">Marius Mosbach</a>, <a href="/search/cs?searchtype=author&query=Klakow%2C+D">Dietrich Klakow</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.14036v1-abstract-short" style="display: inline;"> Analyzing ethnic or religious bias is important for improving fairness, accountability, and transparency of natural language processing models. However, many techniques rely on human-compiled lists of bias terms, which are expensive to create and are limited in coverage. In this study, we present a fully data-driven pipeline for generating a knowledge graph (KG) of cultural knowledge and stereotyp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.14036v1-abstract-full').style.display = 'inline'; document.getElementById('2205.14036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.14036v1-abstract-full" style="display: none;"> Analyzing ethnic or religious bias is important for improving fairness, accountability, and transparency of natural language processing models. However, many techniques rely on human-compiled lists of bias terms, which are expensive to create and are limited in coverage. In this study, we present a fully data-driven pipeline for generating a knowledge graph (KG) of cultural knowledge and stereotypes. Our resulting KG covers 5 religious groups and 5 nationalities and can easily be extended to include more entities. Our human evaluation shows that the majority (59.2%) of non-singleton entries are coherent and complete stereotypes. We further show that performing intermediate masked language model training on the verbalized KG leads to a higher level of cultural awareness in the model and has the potential to increase classification performance on knowledge-crucial samples on a related task, i.e., hate speech detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.14036v1-abstract-full').style.display = 'none'; document.getElementById('2205.14036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 2 figures, accepted as a long paper at WOAH at NAACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.11843">arXiv:2205.11843</a> <span> [<a href="https://arxiv.org/pdf/2205.11843">pdf</a>, <a href="https://arxiv.org/format/2205.11843">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Beam Aware Stochastic Multihop Routing for Flying Ad-hoc Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+A">Anay Ajit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Pereira%2C+R">Roberto Pereira</a>, <a href="/search/cs?searchtype=author&query=Chiariotti%2C+F">Federico Chiariotti</a>, <a href="/search/cs?searchtype=author&query=Pastore%2C+A">Adriano Pastore</a>, <a href="/search/cs?searchtype=author&query=Mestre%2C+X">Xavier Mestre</a>, <a href="/search/cs?searchtype=author&query=Zanella%2C+A">Andrea Zanella</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.11843v1-abstract-short" style="display: inline;"> Routing is a crucial component in the design of Flying Ad-Hoc Networks (FANETs). State of the art routing solutions exploit the position of Unmanned Aerial Vehicles (UAVs) and their mobility information to determine the existence of links between them, but this information is often unreliable, as the topology of FANETs can change quickly and unpredictably. In order to improve the tracking performa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.11843v1-abstract-full').style.display = 'inline'; document.getElementById('2205.11843v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.11843v1-abstract-full" style="display: none;"> Routing is a crucial component in the design of Flying Ad-Hoc Networks (FANETs). State of the art routing solutions exploit the position of Unmanned Aerial Vehicles (UAVs) and their mobility information to determine the existence of links between them, but this information is often unreliable, as the topology of FANETs can change quickly and unpredictably. In order to improve the tracking performance, the uncertainty introduced by imperfect measurements and tracking algorithms needs to be accounted for in the routing. Another important element to consider is beamforming, which can reduce interference, but requires accurate channel and position information to work. In this work, we present the Beam Aware Stochastic Multihop Routing for FANETs (BA-SMURF), a Software-Defined Networking (SDN) routing scheme that takes into account the positioning uncertainty and beamforming design to find the most reliable routes in a FANET. Our simulation results show that joint consideration of the beamforming and routing can provide a 5% throughput improvement with respect to the state of the art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.11843v1-abstract-full').style.display = 'none'; document.getElementById('2205.11843v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.11373">arXiv:2205.11373</a> <span> [<a href="https://arxiv.org/pdf/2205.11373">pdf</a>, <a href="https://arxiv.org/ps/2205.11373">ps</a>, <a href="https://arxiv.org/format/2205.11373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> User Clustering for Rate Splitting using Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pereira%2C+R">Roberto Pereira</a>, <a href="/search/cs?searchtype=author&query=Deshpande%2C+A+A">Anay Ajit Deshpande</a>, <a href="/search/cs?searchtype=author&query=Vaca-Rubio%2C+C+J">Cristian J. Vaca-Rubio</a>, <a href="/search/cs?searchtype=author&query=Mestre%2C+X">Xavier Mestre</a>, <a href="/search/cs?searchtype=author&query=Zanella%2C+A">Andrea Zanella</a>, <a href="/search/cs?searchtype=author&query=Gregoratti%2C+D">David Gregoratti</a>, <a href="/search/cs?searchtype=author&query=de+Carvalho%2C+E">Elisabeth de Carvalho</a>, <a href="/search/cs?searchtype=author&query=Popovski%2C+P">Petar Popovski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.11373v1-abstract-short" style="display: inline;"> Hierarchical Rate Splitting (HRS) schemes proposed in recent years have shown to provide significant improvements in exploiting spatial diversity in wireless networks and provide high throughput for all users while minimising interference among them. Hence, one of the major challenges for such HRS schemes is the necessity to know the optimal clustering of these users based only on their Channel St… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.11373v1-abstract-full').style.display = 'inline'; document.getElementById('2205.11373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.11373v1-abstract-full" style="display: none;"> Hierarchical Rate Splitting (HRS) schemes proposed in recent years have shown to provide significant improvements in exploiting spatial diversity in wireless networks and provide high throughput for all users while minimising interference among them. Hence, one of the major challenges for such HRS schemes is the necessity to know the optimal clustering of these users based only on their Channel State Information (CSI). This clustering problem is known to be NP hard and, to deal with the unmanageable complexity of finding an optimal solution, in this work a scalable and much lighter clustering mechanism based on Neural Network (NN) is proposed. The accuracy and performance metrics show that the NN is able to learn and cluster the users based on the noisy channel response and is able to achieve a rate comparable to other more complex clustering schemes from the literature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.11373v1-abstract-full').style.display = 'none'; document.getElementById('2205.11373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Deshpande%2C+A&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository