Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–49 of 49 results for author: <span class="mathjax">Parmar, M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Parmar%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Parmar, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Parmar%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Parmar, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05675">arXiv:2502.05675</a> <span> [<a href="https://arxiv.org/pdf/2502.05675">pdf</a>, <a href="https://arxiv.org/format/2502.05675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Investigating the Shortcomings of LLMs in Step-by-Step Legal Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mishra%2C+V">Venkatesh Mishra</a>, <a href="/search/cs?searchtype=author&query=Pathiraja%2C+B">Bimsara Pathiraja</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Chidananda%2C+S">Sat Chidananda</a>, <a href="/search/cs?searchtype=author&query=Srinivasa%2C+J">Jayanth Srinivasa</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gaowen Liu</a>, <a href="/search/cs?searchtype=author&query=Payani%2C+A">Ali Payani</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05675v1-abstract-short" style="display: inline;"> Reasoning abilities of LLMs have been a key focus in recent years. One challenging reasoning domain with interesting nuances is legal reasoning, which requires careful application of rules, and precedents while balancing deductive and analogical reasoning, and conflicts between rules. Although there have been a few works on using LLMs for legal reasoning, their focus has been on overall accuracy.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05675v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05675v1-abstract-full" style="display: none;"> Reasoning abilities of LLMs have been a key focus in recent years. One challenging reasoning domain with interesting nuances is legal reasoning, which requires careful application of rules, and precedents while balancing deductive and analogical reasoning, and conflicts between rules. Although there have been a few works on using LLMs for legal reasoning, their focus has been on overall accuracy. In this paper, we dig deeper to do a step-by-step analysis and figure out where they commit errors. We use the college-level Multiple Choice Question-Answering (MCQA) task from the \textit{Civil Procedure} dataset and propose a new error taxonomy derived from initial manual analysis of reasoning chains with respect to several LLMs, including two objective measures: soundness and correctness scores. We then develop an LLM-based automated evaluation framework to identify reasoning errors and evaluate the performance of LLMs. The computation of soundness and correctness on the dataset using the auto-evaluator framework reveals several interesting insights. Furthermore, we show that incorporating the error taxonomy as feedback in popular prompting techniques marginally increases LLM performance. Our work will also serve as an evaluation framework that can be used in detailed error analysis of reasoning chains for logic-intensive complex tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05675v1-abstract-full').style.display = 'none'; document.getElementById('2502.05675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2025 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17030">arXiv:2501.17030</a> <span> [<a href="https://arxiv.org/pdf/2501.17030">pdf</a>, <a href="https://arxiv.org/ps/2501.17030">ps</a>, <a href="https://arxiv.org/format/2501.17030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Challenges in Ensuring AI Safety in DeepSeek-R1 Models: The Shortcomings of Reinforcement Learning Strategies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a>, <a href="/search/cs?searchtype=author&query=Govindarajulu%2C+Y">Yuvaraj Govindarajulu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17030v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved remarkable progress in reasoning, alignment, and task-specific performance. However, ensuring harmlessness in these systems remains a critical challenge, particularly in advanced models like DeepSeek-R1. This paper examines the limitations of Reinforcement Learning (RL) as the primary approach for reducing harmful outputs in DeepSeek-R1 and compares it wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17030v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17030v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17030v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved remarkable progress in reasoning, alignment, and task-specific performance. However, ensuring harmlessness in these systems remains a critical challenge, particularly in advanced models like DeepSeek-R1. This paper examines the limitations of Reinforcement Learning (RL) as the primary approach for reducing harmful outputs in DeepSeek-R1 and compares it with Supervised Fine-Tuning (SFT). While RL improves reasoning capabilities, it faces challenges such as reward hacking, generalization failures, language mixing, and high computational costs. We propose hybrid training approaches combining RL and SFT to achieve robust harmlessness reduction. Usage recommendations and future directions for deploying DeepSeek-R1 responsibly are also presented. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17030v1-abstract-full').style.display = 'none'; document.getElementById('2501.17030v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20011">arXiv:2410.20011</a> <span> [<a href="https://arxiv.org/pdf/2410.20011">pdf</a>, <a href="https://arxiv.org/format/2410.20011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Small Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Van+Nguyen%2C+C">Chien Van Nguyen</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xuan Shen</a>, <a href="/search/cs?searchtype=author&query=Aponte%2C+R">Ryan Aponte</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yu Xia</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+S">Samyadeep Basu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengmian Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Kunapuli%2C+S">Sasidhar Kunapuli</a>, <a href="/search/cs?searchtype=author&query=Barrow%2C+J">Joe Barrow</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Junda Wu</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ashish Singh</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+J">Jiuxiang Gu</a>, <a href="/search/cs?searchtype=author&query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+N+K">Nesreen K. Ahmed</a>, <a href="/search/cs?searchtype=author&query=Lipka%2C+N">Nedim Lipka</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiyi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sungchul Kim</a>, <a href="/search/cs?searchtype=author&query=Deilamsalehy%2C+H">Hanieh Deilamsalehy</a>, <a href="/search/cs?searchtype=author&query=Park%2C+N">Namyong Park</a>, <a href="/search/cs?searchtype=author&query=Rimer%2C+M">Mike Rimer</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhehao Zhang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20011v1-abstract-short" style="display: inline;"> Small Language Models (SLMs) have become increasingly important due to their efficiency and performance to perform various language tasks with minimal computational resources, making them ideal for various settings including on-device, mobile, edge devices, among many others. In this article, we present a comprehensive survey on SLMs, focusing on their architectures, training techniques, and model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20011v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20011v1-abstract-full" style="display: none;"> Small Language Models (SLMs) have become increasingly important due to their efficiency and performance to perform various language tasks with minimal computational resources, making them ideal for various settings including on-device, mobile, edge devices, among many others. In this article, we present a comprehensive survey on SLMs, focusing on their architectures, training techniques, and model compression techniques. We propose a novel taxonomy for categorizing the methods used to optimize SLMs, including model compression, pruning, and quantization techniques. We summarize the benchmark datasets that are useful for benchmarking SLMs along with the evaluation metrics commonly used. Additionally, we highlight key open challenges that remain to be addressed. Our survey aims to serve as a valuable resource for researchers and practitioners interested in developing and deploying small yet efficient language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20011v1-abstract-full').style.display = 'none'; document.getElementById('2410.20011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14702">arXiv:2410.14702</a> <span> [<a href="https://arxiv.org/pdf/2410.14702">pdf</a>, <a href="https://arxiv.org/format/2410.14702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Polymath: A Challenging Multi-modal Mathematical Reasoning Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+H">Himanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+S">Shreyas Verma</a>, <a href="/search/cs?searchtype=author&query=Anantheswaran%2C+U">Ujjwala Anantheswaran</a>, <a href="/search/cs?searchtype=author&query=Scaria%2C+K">Kevin Scaria</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14702v1-abstract-short" style="display: inline;"> Multi-modal Large Language Models (MLLMs) exhibit impressive problem-solving abilities in various domains, but their visual comprehension and abstract reasoning skills remain under-evaluated. To this end, we present PolyMATH, a challenging benchmark aimed at evaluating the general cognitive reasoning abilities of MLLMs. PolyMATH comprises 5,000 manually collected high-quality images of cognitive t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14702v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14702v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14702v1-abstract-full" style="display: none;"> Multi-modal Large Language Models (MLLMs) exhibit impressive problem-solving abilities in various domains, but their visual comprehension and abstract reasoning skills remain under-evaluated. To this end, we present PolyMATH, a challenging benchmark aimed at evaluating the general cognitive reasoning abilities of MLLMs. PolyMATH comprises 5,000 manually collected high-quality images of cognitive textual and visual challenges across 10 distinct categories, including pattern recognition, spatial reasoning, and relative reasoning. We conducted a comprehensive, and quantitative evaluation of 15 MLLMs using four diverse prompting strategies, including Chain-of-Thought and Step-Back. The best scores achieved on PolyMATH are ~41%, ~36%, and ~27%, obtained by Claude-3.5 Sonnet, GPT-4o and Gemini-1.5 Pro respectively - highlighting the logical and visual complexity of these questions. A further fine-grained error analysis reveals that these models struggle to understand spatial relations and perform drawn-out, high-level reasoning. This is further strengthened by our ablation study estimating MLLM performance when given textual descriptions in place of diagrams. As evidenced by ~4% improvement over textual descriptions as opposed to actual images, we discover that models do not truly comprehend visual diagrams and the spatial information therein, and are thus prone to logical errors. Finally, we evaluate the OpenAI o1 models and find that their performance only matches the human baseline, highlighting the difficulty of the benchmark. The results on PolyMATH highlight the room for improvement in multi-modal reasoning and provide unique insights to guide the development of future MLLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14702v1-abstract-full').style.display = 'none'; document.getElementById('2410.14702v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">49 pages, (10 pages paper, 9 pages references, 30 pages appendix)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02140">arXiv:2408.02140</a> <span> [<a href="https://arxiv.org/pdf/2408.02140">pdf</a>, <a href="https://arxiv.org/format/2408.02140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> VidModEx: Interpretable and Efficient Black Box Model Extraction for High-Dimensional Spaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+S+S">Somnath Sendhil Kumar</a>, <a href="/search/cs?searchtype=author&query=Govindarajulu%2C+Y">Yuvaraj Govindarajulu</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+P">Pavan Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02140v1-abstract-short" style="display: inline;"> In the domain of black-box model extraction, conventional methods reliant on soft labels or surrogate datasets struggle with scaling to high-dimensional input spaces and managing the complexity of an extensive array of interrelated classes. In this work, we present a novel approach that utilizes SHAP (SHapley Additive exPlanations) to enhance synthetic data generation. SHAP quantifies the individu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02140v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02140v1-abstract-full" style="display: none;"> In the domain of black-box model extraction, conventional methods reliant on soft labels or surrogate datasets struggle with scaling to high-dimensional input spaces and managing the complexity of an extensive array of interrelated classes. In this work, we present a novel approach that utilizes SHAP (SHapley Additive exPlanations) to enhance synthetic data generation. SHAP quantifies the individual contributions of each input feature towards the victim model's output, facilitating the optimization of an energy-based GAN towards a desirable output. This method significantly boosts performance, achieving a 16.45% increase in the accuracy of image classification models and extending to video classification models with an average improvement of 26.11% and a maximum of 33.36% on challenging datasets such as UCF11, UCF101, Kinetics 400, Kinetics 600, and Something-Something V2. We further demonstrate the effectiveness and practical utility of our method under various scenarios, including the availability of top-k prediction probabilities, top-k prediction labels, and top-1 labels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02140v1-abstract-full').style.display = 'none'; document.getElementById('2408.02140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14790">arXiv:2407.14790</a> <span> [<a href="https://arxiv.org/pdf/2407.14790">pdf</a>, <a href="https://arxiv.org/format/2407.14790">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Step-by-Step Reasoning to Solve Grid Puzzles: Where do LLMs Falter? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tyagi%2C+N">Nemika Tyagi</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+M">Mohith Kulkarni</a>, <a href="/search/cs?searchtype=author&query=RRV%2C+A">Aswin RRV</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+N">Nisarg Patel</a>, <a href="/search/cs?searchtype=author&query=Nakamura%2C+M">Mutsumi Nakamura</a>, <a href="/search/cs?searchtype=author&query=Mitra%2C+A">Arindam Mitra</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14790v2-abstract-short" style="display: inline;"> Solving grid puzzles involves a significant amount of logical reasoning. Hence, it is a good domain to evaluate the reasoning capability of a model which can then guide us to improve the reasoning ability of models. However, most existing works evaluate only the final predicted answer of a puzzle, without delving into an in-depth analysis of the LLMs' reasoning chains (such as where they falter) o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14790v2-abstract-full').style.display = 'inline'; document.getElementById('2407.14790v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14790v2-abstract-full" style="display: none;"> Solving grid puzzles involves a significant amount of logical reasoning. Hence, it is a good domain to evaluate the reasoning capability of a model which can then guide us to improve the reasoning ability of models. However, most existing works evaluate only the final predicted answer of a puzzle, without delving into an in-depth analysis of the LLMs' reasoning chains (such as where they falter) or providing any finer metrics to evaluate them. Since LLMs may rely on simple heuristics or artifacts to predict the final answer, it is crucial to evaluate the generated reasoning chain beyond overall correctness measures, for accurately evaluating the reasoning abilities of LLMs. To this end, we first develop GridPuzzle, an evaluation dataset comprising 274 grid-based puzzles with different complexities. Second, we propose a new error taxonomy derived from manual analysis of reasoning chains from LLMs including GPT-4, Claude-3, Gemini, Mistral, and Llama-2. Then, we develop an LLM-based framework for large-scale subjective evaluation (i.e., identifying errors) and an objective metric, PuzzleEval, to evaluate the correctness of reasoning chains. Evaluating reasoning chains from LLMs leads to several interesting findings. We further show that existing prompting methods used for enhancing models' reasoning abilities do not improve performance on GridPuzzle. This highlights the importance of understanding fine-grained errors and presents a challenge for future research to enhance LLMs' puzzle-solving abilities by developing methods that address these errors. Data and source code are available at https://github.com/Mihir3009/GridPuzzle. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14790v2-abstract-full').style.display = 'none'; document.getElementById('2407.14790v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2024 Main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11599">arXiv:2407.11599</a> <span> [<a href="https://arxiv.org/pdf/2407.11599">pdf</a>, <a href="https://arxiv.org/format/2407.11599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing TinyML Security: Study of Adversarial Attack Transferability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shah%2C+P">Parin Shah</a>, <a href="/search/cs?searchtype=author&query=Govindarajulu%2C+Y">Yuvaraj Govindarajulu</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+P">Pavan Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11599v2-abstract-short" style="display: inline;"> The recent strides in artificial intelligence (AI) and machine learning (ML) have propelled the rise of TinyML, a paradigm enabling AI computations at the edge without dependence on cloud connections. While TinyML offers real-time data analysis and swift responses critical for diverse applications, its devices' intrinsic resource limitations expose them to security risks. This research delves into… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11599v2-abstract-full').style.display = 'inline'; document.getElementById('2407.11599v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11599v2-abstract-full" style="display: none;"> The recent strides in artificial intelligence (AI) and machine learning (ML) have propelled the rise of TinyML, a paradigm enabling AI computations at the edge without dependence on cloud connections. While TinyML offers real-time data analysis and swift responses critical for diverse applications, its devices' intrinsic resource limitations expose them to security risks. This research delves into the adversarial vulnerabilities of AI models on resource-constrained embedded hardware, with a focus on Model Extraction and Evasion Attacks. Our findings reveal that adversarial attacks from powerful host machines could be transferred to smaller, less secure devices like ESP32 and Raspberry Pi. This illustrates that adversarial attacks could be extended to tiny devices, underscoring vulnerabilities, and emphasizing the necessity for reinforced security measures in TinyML deployments. This exploration enhances the comprehension of security challenges in TinyML and offers insights for safeguarding sensitive data and ensuring device dependability in AI-powered edge computing settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11599v2-abstract-full').style.display = 'none'; document.getElementById('2407.11599v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted and presented at tinyML Foundation EMEA Innovation Forum 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04855">arXiv:2407.04855</a> <span> [<a href="https://arxiv.org/pdf/2407.04855">pdf</a>, <a href="https://arxiv.org/format/2407.04855">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Enhancing Coherence in Extractive Summarization: Dataset and Experiments with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Deilamsalehy%2C+H">Hanieh Deilamsalehy</a>, <a href="/search/cs?searchtype=author&query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S">Seunghyun Yoon</a>, <a href="/search/cs?searchtype=author&query=Rossi%2C+R+A">Ryan A. Rossi</a>, <a href="/search/cs?searchtype=author&query=Bui%2C+T">Trung Bui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04855v1-abstract-short" style="display: inline;"> Extractive summarization plays a pivotal role in natural language processing due to its wide-range applications in summarizing diverse content efficiently, while also being faithful to the original content. Despite significant advancement achieved in extractive summarization by Large Language Models (LLMs), these summaries frequently exhibit incoherence. An important aspect of the coherent summary… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04855v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04855v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04855v1-abstract-full" style="display: none;"> Extractive summarization plays a pivotal role in natural language processing due to its wide-range applications in summarizing diverse content efficiently, while also being faithful to the original content. Despite significant advancement achieved in extractive summarization by Large Language Models (LLMs), these summaries frequently exhibit incoherence. An important aspect of the coherent summary is its readability for intended users. Although there have been many datasets and benchmarks proposed for creating coherent extractive summaries, none of them currently incorporate user intent to improve coherence in extractive summarization. Motivated by this, we propose a systematically created human-annotated dataset consisting of coherent summaries for five publicly available datasets and natural language user feedback, offering valuable insights into how to improve coherence in extractive summaries. We utilize this dataset for aligning LLMs through supervised fine-tuning with natural language human feedback to enhance the coherence of their generated summaries. Preliminary experiments with Falcon-40B and Llama-2-13B show significant performance improvements (~10% Rouge-L) in terms of producing coherent summaries. We further utilize human feedback to benchmark results over instruction-tuned models such as FLAN-T5 which resulted in several interesting findings. Data and source code are available at https://github.com/Mihir3009/Extract-AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04855v1-abstract-full').style.display = 'none'; document.getElementById('2407.04855v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17169">arXiv:2406.17169</a> <span> [<a href="https://arxiv.org/pdf/2406.17169">pdf</a>, <a href="https://arxiv.org/format/2406.17169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-LogiEval: Towards Evaluating Multi-Step Logical Reasoning Ability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Patel%2C+N">Nisarg Patel</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+M">Mohith Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Budhiraja%2C+A">Aashna Budhiraja</a>, <a href="/search/cs?searchtype=author&query=Nakamura%2C+M">Mutsumi Nakamura</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17169v3-abstract-short" style="display: inline;"> As Large Language Models (LLMs) continue to exhibit remarkable performance in natural language understanding tasks, there is a crucial need to measure their ability for human-like multi-step logical reasoning. Existing logical reasoning evaluation benchmarks often focus primarily on simplistic single-step or multi-step reasoning with a limited set of inference rules. Furthermore, the lack of datas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17169v3-abstract-full').style.display = 'inline'; document.getElementById('2406.17169v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17169v3-abstract-full" style="display: none;"> As Large Language Models (LLMs) continue to exhibit remarkable performance in natural language understanding tasks, there is a crucial need to measure their ability for human-like multi-step logical reasoning. Existing logical reasoning evaluation benchmarks often focus primarily on simplistic single-step or multi-step reasoning with a limited set of inference rules. Furthermore, the lack of datasets for evaluating non-monotonic reasoning represents a crucial gap since it aligns more closely with human-like reasoning. To address these limitations, we propose Multi-LogiEval, a comprehensive evaluation dataset encompassing multi-step logical reasoning with various inference rules and depths. Multi-LogiEval covers three logic types--propositional, first-order, and non-monotonic--consisting of more than 30 inference rules and more than 60 of their combinations with various depths. Leveraging this dataset, we conduct evaluations on a range of LLMs including GPT-4, ChatGPT, Gemini-Pro, Yi, Orca, and Mistral, employing a zero-shot chain-of-thought. Experimental results show that there is a significant drop in the performance of LLMs as the reasoning steps/depth increases (average accuracy of ~68% at depth-1 to ~43% at depth-5). We further conduct a thorough investigation of reasoning chains generated by LLMs which reveals several important findings. We believe that Multi-LogiEval facilitates future research for evaluating and enhancing the logical reasoning ability of LLMs. Data is available at https://github.com/Mihir3009/Multi-LogiEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17169v3-abstract-full').style.display = 'none'; document.getElementById('2406.17169v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2024 Main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15656">arXiv:2404.15656</a> <span> [<a href="https://arxiv.org/pdf/2404.15656">pdf</a>, <a href="https://arxiv.org/format/2404.15656">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> MISLEAD: Manipulating Importance of Selected features for Learning Epsilon in Evasion Attack Deception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khazanchi%2C+V">Vidit Khazanchi</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+P">Pavan Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Govindarajulu%2C+Y">Yuvaraj Govindarajulu</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15656v2-abstract-short" style="display: inline;"> Emerging vulnerabilities in machine learning (ML) models due to adversarial attacks raise concerns about their reliability. Specifically, evasion attacks manipulate models by introducing precise perturbations to input data, causing erroneous predictions. To address this, we propose a methodology combining SHapley Additive exPlanations (SHAP) for feature importance analysis with an innovative Optim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15656v2-abstract-full').style.display = 'inline'; document.getElementById('2404.15656v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15656v2-abstract-full" style="display: none;"> Emerging vulnerabilities in machine learning (ML) models due to adversarial attacks raise concerns about their reliability. Specifically, evasion attacks manipulate models by introducing precise perturbations to input data, causing erroneous predictions. To address this, we propose a methodology combining SHapley Additive exPlanations (SHAP) for feature importance analysis with an innovative Optimal Epsilon technique for conducting evasion attacks. Our approach begins with SHAP-based analysis to understand model vulnerabilities, crucial for devising targeted evasion strategies. The Optimal Epsilon technique, employing a Binary Search algorithm, efficiently determines the minimum epsilon needed for successful evasion. Evaluation across diverse machine learning architectures demonstrates the technique's precision in generating adversarial samples, underscoring its efficacy in manipulating model outcomes. This study emphasizes the critical importance of continuous assessment and monitoring to identify and mitigate potential security risks in machine learning systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15656v2-abstract-full').style.display = 'none'; document.getElementById('2404.15656v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15522">arXiv:2404.15522</a> <span> [<a href="https://arxiv.org/pdf/2404.15522">pdf</a>, <a href="https://arxiv.org/format/2404.15522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LogicBench: Towards Systematic Evaluation of Logical Reasoning Ability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+N">Nisarg Patel</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Nakamura%2C+M">Mutsumi Nakamura</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/cs?searchtype=author&query=Mashetty%2C+S">Santosh Mashetty</a>, <a href="/search/cs?searchtype=author&query=Mitra%2C+A">Arindam Mitra</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15522v2-abstract-short" style="display: inline;"> Recently developed large language models (LLMs) have been shown to perform remarkably well on a wide range of language understanding tasks. But, can they really "reason" over the natural language? This question has been receiving significant research attention and many reasoning skills such as commonsense, numerical, and qualitative have been studied. However, the crucial skill pertaining to 'logi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15522v2-abstract-full').style.display = 'inline'; document.getElementById('2404.15522v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15522v2-abstract-full" style="display: none;"> Recently developed large language models (LLMs) have been shown to perform remarkably well on a wide range of language understanding tasks. But, can they really "reason" over the natural language? This question has been receiving significant research attention and many reasoning skills such as commonsense, numerical, and qualitative have been studied. However, the crucial skill pertaining to 'logical reasoning' has remained underexplored. Existing work investigating this reasoning ability of LLMs has focused only on a couple of inference rules (such as modus ponens and modus tollens) of propositional and first-order logic. Addressing the above limitation, we comprehensively evaluate the logical reasoning ability of LLMs on 25 different reasoning patterns spanning over propositional, first-order, and non-monotonic logics. To enable systematic evaluation, we introduce LogicBench, a natural language question-answering dataset focusing on the use of a single inference rule. We conduct detailed analysis with a range of LLMs such as GPT-4, ChatGPT, Gemini, Llama-2, and Mistral using chain-of-thought prompting. Experimental results show that existing LLMs do not fare well on LogicBench; especially, they struggle with instances involving complex reasoning and negations. Furthermore, they sometimes overlook contextual information necessary for reasoning to arrive at the correct conclusion. We believe that our work and findings facilitate future research for evaluating and enhancing the logical reasoning ability of LLMs. Data and code are available at https://github.com/Mihir3009/LogicBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15522v2-abstract-full').style.display = 'none'; document.getElementById('2404.15522v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACL(Main) 2024 | First version available @ https://openreview.net/forum?id=7NR2ZVzZxx</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.00388">arXiv:2401.00388</a> <span> [<a href="https://arxiv.org/pdf/2401.00388">pdf</a>, <a href="https://arxiv.org/format/2401.00388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FusionMind -- Improving question and answering with external context fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Verma%2C+S">Shreyas Verma</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manoj Parmar</a>, <a href="/search/cs?searchtype=author&query=Choudhary%2C+P">Palash Choudhary</a>, <a href="/search/cs?searchtype=author&query=Porwal%2C+S">Sanchita Porwal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.00388v1-abstract-short" style="display: inline;"> Answering questions using pre-trained language models (LMs) and knowledge graphs (KGs) presents challenges in identifying relevant knowledge and performing joint reasoning.We compared LMs (fine-tuned for the task) with the previously published QAGNN method for the Question-answering (QA) objective and further measured the impact of additional factual context on the QAGNN performance. The QAGNN met… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00388v1-abstract-full').style.display = 'inline'; document.getElementById('2401.00388v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.00388v1-abstract-full" style="display: none;"> Answering questions using pre-trained language models (LMs) and knowledge graphs (KGs) presents challenges in identifying relevant knowledge and performing joint reasoning.We compared LMs (fine-tuned for the task) with the previously published QAGNN method for the Question-answering (QA) objective and further measured the impact of additional factual context on the QAGNN performance. The QAGNN method employs LMs to encode QA context and estimate KG node importance, and effectively update the question choice entity representations using Graph Neural Networks (GNNs). We further experimented with enhancing the QA context encoding by incorporating relevant knowledge facts for the question stem. The models are trained on the OpenbookQA dataset, which contains ~6000 4-way multiple choice questions and is widely used as a benchmark for QA tasks. Through our experimentation, we found that incorporating knowledge facts context led to a significant improvement in performance. In contrast, the addition of knowledge graphs to language models resulted in only a modest increase. This suggests that the integration of contextual knowledge facts may be more impactful for enhancing question answering performance compared to solely adding knowledge graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00388v1-abstract-full').style.display = 'none'; document.getElementById('2401.00388v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 4 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.14423">arXiv:2312.14423</a> <span> [<a href="https://arxiv.org/pdf/2312.14423">pdf</a>, <a href="https://arxiv.org/format/2312.14423">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficacy of Machine-Generated Instructions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gulati%2C+S">Samaksh Gulati</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+A">Anshit Verma</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manoj Parmar</a>, <a href="/search/cs?searchtype=author&query=Chaudhary%2C+P">Palash Chaudhary</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.14423v1-abstract-short" style="display: inline;"> Large "instruction-tuned" language models (i.e., finetuned to respond to instructions) have demonstrated a remarkable ability to generalize zero-shot to new tasks. Nevertheless, they depend heavily on human-written instruction data that is often limited in quantity, diversity, and creativity, therefore hindering the generality of the tuned model. We conducted a quantitative study to figure out the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14423v1-abstract-full').style.display = 'inline'; document.getElementById('2312.14423v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.14423v1-abstract-full" style="display: none;"> Large "instruction-tuned" language models (i.e., finetuned to respond to instructions) have demonstrated a remarkable ability to generalize zero-shot to new tasks. Nevertheless, they depend heavily on human-written instruction data that is often limited in quantity, diversity, and creativity, therefore hindering the generality of the tuned model. We conducted a quantitative study to figure out the efficacy of machine-generated annotations, where we compare the results of a fine-tuned BERT model with human v/s machine-generated annotations. Applying our methods to the vanilla GPT-3 model, we saw that machine generated annotations were 78.54% correct and the fine-tuned model achieved a 96.01% model performance compared to the performance with human-labelled annotations. This result shows that machine-generated annotations are a resource and cost effective way to fine-tune down-stream models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14423v1-abstract-full').style.display = 'none'; document.getElementById('2312.14423v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 2 pages references, 6 Tables, 8 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.06979">arXiv:2312.06979</a> <span> [<a href="https://arxiv.org/pdf/2312.06979">pdf</a>, <a href="https://arxiv.org/ps/2312.06979">ps</a>, <a href="https://arxiv.org/format/2312.06979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the notion of Hallucinations from the lens of Bias and Validity in Synthetic CXR Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhardwaj%2C+G">Gauri Bhardwaj</a>, <a href="/search/cs?searchtype=author&query=Govindarajulu%2C+Y">Yuvaraj Govindarajulu</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+S">Sundaraparipurnan Narayanan</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+P">Pavan Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.06979v1-abstract-short" style="display: inline;"> Medical imaging has revolutionized disease diagnosis, yet the potential is hampered by limited access to diverse and privacy-conscious datasets. Open-source medical datasets, while valuable, suffer from data quality and clinical information disparities. Generative models, such as diffusion models, aim to mitigate these challenges. At Stanford, researchers explored the utility of a fine-tuned Stabl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06979v1-abstract-full').style.display = 'inline'; document.getElementById('2312.06979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.06979v1-abstract-full" style="display: none;"> Medical imaging has revolutionized disease diagnosis, yet the potential is hampered by limited access to diverse and privacy-conscious datasets. Open-source medical datasets, while valuable, suffer from data quality and clinical information disparities. Generative models, such as diffusion models, aim to mitigate these challenges. At Stanford, researchers explored the utility of a fine-tuned Stable Diffusion model (RoentGen) for medical imaging data augmentation. Our work examines specific considerations to expand the Stanford research question, Could Stable Diffusion Solve a Gap in Medical Imaging Data? from the lens of bias and validity of the generated outcomes. We leveraged RoentGen to produce synthetic Chest-XRay (CXR) images and conducted assessments on bias, validity, and hallucinations. Diagnostic accuracy was evaluated by a disease classifier, while a COVID classifier uncovered latent hallucinations. The bias analysis unveiled disparities in classification performance among various subgroups, with a pronounced impact on the Female Hispanic subgroup. Furthermore, incorporating race and gender into input prompts exacerbated fairness issues in the generated images. The quality of synthetic images exhibited variability, particularly in certain disease classes, where there was more significant uncertainty compared to the original images. Additionally, we observed latent hallucinations, with approximately 42% of the images incorrectly indicating COVID, hinting at the presence of hallucinatory elements. These identifications provide new research directions towards interpretability of synthetic CXR images, for further understanding of associated risks and patient safety in medical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06979v1-abstract-full').style.display = 'none'; document.getElementById('2312.06979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at 37th Conference on Neural Information Processing Systems (NeurIPS 2023) - "Medical Imaging Meets NeurIPS" Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09564">arXiv:2311.09564</a> <span> [<a href="https://arxiv.org/pdf/2311.09564">pdf</a>, <a href="https://arxiv.org/format/2311.09564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LongBoX: Evaluating Transformers on Long-Sequence Clinical Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+H">Himanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+D">Disha Agrawal</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09564v1-abstract-short" style="display: inline;"> Many large language models (LLMs) for medicine have largely been evaluated on short texts, and their ability to handle longer sequences such as a complete electronic health record (EHR) has not been systematically explored. Assessing these models on long sequences is crucial since prior work in the general domain has demonstrated performance degradation of LLMs on longer texts. Motivated by this,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09564v1-abstract-full').style.display = 'inline'; document.getElementById('2311.09564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09564v1-abstract-full" style="display: none;"> Many large language models (LLMs) for medicine have largely been evaluated on short texts, and their ability to handle longer sequences such as a complete electronic health record (EHR) has not been systematically explored. Assessing these models on long sequences is crucial since prior work in the general domain has demonstrated performance degradation of LLMs on longer texts. Motivated by this, we introduce LongBoX, a collection of seven medical datasets in text-to-text format, designed to investigate model performance on long sequences. Preliminary experiments reveal that both medical LLMs (e.g., BioGPT) and strong general domain LLMs (e.g., FLAN-T5) struggle on this benchmark. We further evaluate two techniques designed for long-sequence handling: (i) local-global attention, and (ii) Fusion-in-Decoder (FiD). Our results demonstrate mixed results with long-sequence handling - while scores on some datasets increase, there is substantial room for improvement. We hope that LongBoX facilitates the development of more effective long-sequence techniques for the medical domain. Data and source code are available at https://github.com/Mihir3009/LongBoX. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09564v1-abstract-full').style.display = 'none'; document.getElementById('2311.09564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18581">arXiv:2310.18581</a> <span> [<a href="https://arxiv.org/pdf/2310.18581">pdf</a>, <a href="https://arxiv.org/format/2310.18581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Accelerating LLaMA Inference by Enabling Intermediate Layer Decoding via Instruction Tuning with LITE </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Chatterjee%2C+A">Agneet Chatterjee</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18581v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved remarkable performance across a wide variety of natural language tasks; however, their large size makes their inference slow and computationally expensive. Focusing on this problem, we propose to instruction tune LLMs with additional explicit losses from the intermediate layers (LITE) and show that it enables these layers to acquire 'good' generation abil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18581v2-abstract-full').style.display = 'inline'; document.getElementById('2310.18581v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18581v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved remarkable performance across a wide variety of natural language tasks; however, their large size makes their inference slow and computationally expensive. Focusing on this problem, we propose to instruction tune LLMs with additional explicit losses from the intermediate layers (LITE) and show that it enables these layers to acquire 'good' generation ability without affecting the generation ability of the final layer. We perform 'dynamic confidence-based early exiting' at token level from the intermediate layers which improves the efficiency of text generation without compromising the quality of the generation. We conduct comprehensive experiments by instruction tuning LLaMA-2 models on the Alpaca dataset and holistically evaluate on four different human-instruction test sets. We show that dynamic early exiting achieves consistent and considerable inference computation cost improvements (37.86% for 7B and 46.35% for 13B model) while maintaining the generation quality of the responses. We further conduct a thorough analysis of the results over several important aspects, such as comparing the semantic similarity of the outputs and dissecting the efficiency improvements by comparing the number of tokens generated in the output. In summary, our work contributes to improving the efficiency of LLM inference while maintaining the generation quality, a crucial step en route to enabling their widespread adoption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18581v2-abstract-full').style.display = 'none'; document.getElementById('2310.18581v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17876">arXiv:2310.17876</a> <span> [<a href="https://arxiv.org/pdf/2310.17876">pdf</a>, <a href="https://arxiv.org/format/2310.17876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TarGEN: Targeted Data Generation with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gupta%2C+H">Himanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Scaria%2C+K">Kevin Scaria</a>, <a href="/search/cs?searchtype=author&query=Anantheswaran%2C+U">Ujjwala Anantheswaran</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+S">Shreyas Verma</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Sawant%2C+S+A">Saurabh Arjun Sawant</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17876v3-abstract-short" style="display: inline;"> The rapid advancement of large language models (LLMs) has sparked interest in data synthesis techniques, aiming to generate diverse and high-quality synthetic datasets. However, these synthetic datasets often suffer from a lack of diversity and added noise. In this paper, we present TarGEN, a multi-step prompting strategy for generating high-quality synthetic datasets utilizing a LLM. An advantage… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17876v3-abstract-full').style.display = 'inline'; document.getElementById('2310.17876v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17876v3-abstract-full" style="display: none;"> The rapid advancement of large language models (LLMs) has sparked interest in data synthesis techniques, aiming to generate diverse and high-quality synthetic datasets. However, these synthetic datasets often suffer from a lack of diversity and added noise. In this paper, we present TarGEN, a multi-step prompting strategy for generating high-quality synthetic datasets utilizing a LLM. An advantage of TarGEN is its seedless nature; it does not require specific task instances, broadening its applicability beyond task replication. We augment TarGEN with a method known as self-correction empowering LLMs to rectify inaccurately labeled instances during dataset creation, ensuring reliable labels. To assess our technique's effectiveness, we emulate 8 tasks from the SuperGLUE benchmark and finetune various language models, including encoder-only, encoder-decoder, and decoder-only models on both synthetic and original training sets. Evaluation on the original test set reveals that models trained on datasets generated by TarGEN perform approximately 1-2% points better than those trained on original datasets (82.84% via syn. vs. 81.12% on og. using Flan-T5). When incorporating instruction tuning, the performance increases to 84.54% on synthetic data vs. 81.49% on original data by Flan-T5. A comprehensive analysis of the synthetic dataset compared to the original dataset reveals that the synthetic dataset demonstrates similar or higher levels of dataset complexity and diversity. Furthermore, the synthetic dataset displays a bias level that aligns closely with the original dataset. Finally, when pre-finetuned on our synthetic SuperGLUE dataset, T5-3B yields impressive results on the OpenLLM leaderboard, surpassing the model trained on the Self-Instruct dataset by 4.14% points. We hope that TarGEN can be helpful for quality data generation and reducing the human efforts to create complex benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17876v3-abstract-full').style.display = 'none'; document.getElementById('2310.17876v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">COLM 2024, 35 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.00836">arXiv:2310.00836</a> <span> [<a href="https://arxiv.org/pdf/2310.00836">pdf</a>, <a href="https://arxiv.org/format/2310.00836">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards LogiGLUE: A Brief Survey and A Benchmark for Analyzing Logical Reasoning Capabilities of Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/cs?searchtype=author&query=Kumbhar%2C+S">Shrinidhi Kumbhar</a>, <a href="/search/cs?searchtype=author&query=shen%2C+M">Ming shen</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Banerjee%2C+P">Pratyay Banerjee</a>, <a href="/search/cs?searchtype=author&query=Aditya%2C+S">Somak Aditya</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.00836v3-abstract-short" style="display: inline;"> Logical reasoning is fundamental for humans yet presents a substantial challenge in the domain of Artificial Intelligence. Initially, researchers used Knowledge Representation and Reasoning (KR) systems that did not scale and required non-trivial manual effort. Recently, the emergence of large language models (LLMs) has demonstrated the ability to overcome various limitations of formal Knowledge R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00836v3-abstract-full').style.display = 'inline'; document.getElementById('2310.00836v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.00836v3-abstract-full" style="display: none;"> Logical reasoning is fundamental for humans yet presents a substantial challenge in the domain of Artificial Intelligence. Initially, researchers used Knowledge Representation and Reasoning (KR) systems that did not scale and required non-trivial manual effort. Recently, the emergence of large language models (LLMs) has demonstrated the ability to overcome various limitations of formal Knowledge Representation (KR) systems. Consequently, there's a growing interest in using LLMs for logical reasoning via natural language. This work strives to understand the proficiency of LLMs in logical reasoning by offering a brief review of the latest progress in this area; with a focus on the logical reasoning datasets, tasks, and the methods adopted to utilize LLMs for reasoning. To offer a thorough analysis, we have compiled a benchmark titled LogiGLUE. This includes 24 varied datasets encompassing deductive, abductive, and inductive reasoning. Utilizing LogiGLUE as a foundation, we have trained an instruction fine-tuned language model, resulting in LogiT5. We study single-task training, multi-task training, and "chain-of-thought" knowledge distillation fine-tuning technique to assess the performance of model across the different logical reasoning categories. We also assess various LLMs using LogiGLUE, and the findings indicate that LLMs excel most in abductive reasoning, followed by deductive reasoning, while they are least effective at inductive reasoning. We aim to shed light on the capabilities and potential pathways for enhancing logical reasoning proficiency in LLMs, paving the way for more advanced and nuanced developments in this critical field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00836v3-abstract-full').style.display = 'none'; document.getElementById('2310.00836v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.04635">arXiv:2309.04635</a> <span> [<a href="https://arxiv.org/pdf/2309.04635">pdf</a>, <a href="https://arxiv.org/format/2309.04635">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can NLP Models 'Identify', 'Distinguish', and 'Justify' Questions that Don't have a Definitive Answer? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Ayushi Agarwal</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+N">Nisarg Patel</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Mallina%2C+P">Pavan Mallina</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+A+B">Aryan Bhavin Shah</a>, <a href="/search/cs?searchtype=author&query=Sangaraju%2C+S+R">Srihari Raju Sangaraju</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+T">Tirth Patel</a>, <a href="/search/cs?searchtype=author&query=Thakkar%2C+N">Nihar Thakkar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.04635v1-abstract-short" style="display: inline;"> Though state-of-the-art (SOTA) NLP systems have achieved remarkable performance on a variety of language understanding tasks, they primarily focus on questions that have a correct and a definitive answer. However, in real-world applications, users often ask questions that don't have a definitive answer. Incorrectly answering such questions certainly hampers a system's reliability and trustworthine… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04635v1-abstract-full').style.display = 'inline'; document.getElementById('2309.04635v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.04635v1-abstract-full" style="display: none;"> Though state-of-the-art (SOTA) NLP systems have achieved remarkable performance on a variety of language understanding tasks, they primarily focus on questions that have a correct and a definitive answer. However, in real-world applications, users often ask questions that don't have a definitive answer. Incorrectly answering such questions certainly hampers a system's reliability and trustworthiness. Can SOTA models accurately identify such questions and provide a reasonable response? To investigate the above question, we introduce QnotA, a dataset consisting of five different categories of questions that don't have definitive answers. Furthermore, for each QnotA instance, we also provide a corresponding QA instance i.e. an alternate question that ''can be'' answered. With this data, we formulate three evaluation tasks that test a system's ability to 'identify', 'distinguish', and 'justify' QnotA questions. Through comprehensive experiments, we show that even SOTA models including GPT-3 and Flan T5 do not fare well on these tasks and lack considerably behind the human performance baseline. We conduct a thorough analysis which further leads to several interesting findings. Overall, we believe our work and findings will encourage and facilitate further research in this important area and help develop more robust models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.04635v1-abstract-full').style.display = 'none'; document.getElementById('2309.04635v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TrustNLP Workshop at ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.08147">arXiv:2308.08147</a> <span> [<a href="https://arxiv.org/pdf/2308.08147">pdf</a>, <a href="https://arxiv.org/format/2308.08147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MDDial: A Multi-turn Differential Diagnosis Dialogue Dataset with Reliability Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Macherla%2C+S">Srija Macherla</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.08147v1-abstract-short" style="display: inline;"> Dialogue systems for Automatic Differential Diagnosis (ADD) have a wide range of real-life applications. These dialogue systems are promising for providing easy access and reducing medical costs. Building end-to-end ADD dialogue systems requires dialogue training datasets. However, to the best of our knowledge, there is no publicly available ADD dialogue dataset in English (although non-English da… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08147v1-abstract-full').style.display = 'inline'; document.getElementById('2308.08147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.08147v1-abstract-full" style="display: none;"> Dialogue systems for Automatic Differential Diagnosis (ADD) have a wide range of real-life applications. These dialogue systems are promising for providing easy access and reducing medical costs. Building end-to-end ADD dialogue systems requires dialogue training datasets. However, to the best of our knowledge, there is no publicly available ADD dialogue dataset in English (although non-English datasets exist). Driven by this, we introduce MDDial, the first differential diagnosis dialogue dataset in English which can aid to build and evaluate end-to-end ADD dialogue systems. Additionally, earlier studies present the accuracy of diagnosis and symptoms either individually or as a combined weighted score. This method overlooks the connection between the symptoms and the diagnosis. We introduce a unified score for the ADD system that takes into account the interplay between symptoms and diagnosis. This score also indicates the system's reliability. To the end, we train two moderate-size of language models on MDDial. Our experiments suggest that while these language models can perform well on many natural language understanding tasks, including dialogue tasks in the general domain, they struggle to relate relevant symptoms and disease and thus have poor performance on MDDial. MDDial will be released publicly to aid the study of ADD dialogue research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08147v1-abstract-full').style.display = 'none'; document.getElementById('2308.08147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.05127">arXiv:2308.05127</a> <span> [<a href="https://arxiv.org/pdf/2308.05127">pdf</a>, <a href="https://arxiv.org/format/2308.05127">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Data-Free Model Extraction Attacks in the Context of Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shah%2C+H">Harshit Shah</a>, <a href="/search/cs?searchtype=author&query=G%2C+A">Aravindhan G</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+P">Pavan Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Govidarajulu%2C+Y">Yuvaraj Govidarajulu</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.05127v1-abstract-short" style="display: inline;"> A significant number of machine learning models are vulnerable to model extraction attacks, which focus on stealing the models by using specially curated queries against the target model. This task is well accomplished by using part of the training data or a surrogate dataset to train a new model that mimics a target model in a white-box environment. In pragmatic situations, however, the target mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05127v1-abstract-full').style.display = 'inline'; document.getElementById('2308.05127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.05127v1-abstract-full" style="display: none;"> A significant number of machine learning models are vulnerable to model extraction attacks, which focus on stealing the models by using specially curated queries against the target model. This task is well accomplished by using part of the training data or a surrogate dataset to train a new model that mimics a target model in a white-box environment. In pragmatic situations, however, the target models are trained on private datasets that are inaccessible to the adversary. The data-free model extraction technique replaces this problem when it comes to using queries artificially curated by a generator similar to that used in Generative Adversarial Nets. We propose for the first time, to the best of our knowledge, an adversary black box attack extending to a regression problem for predicting bounding box coordinates in object detection. As part of our study, we found that defining a loss function and using a novel generator setup is one of the key aspects in extracting the target model. We find that the proposed model extraction method achieves significant results by using reasonable queries. The discovery of this object detection vulnerability will support future prospects for securing such models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05127v1-abstract-full').style.display = 'none'; document.getElementById('2308.05127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to The 14th International Conference on Computer Vision Systems (ICVS 2023), to be published in Springer, Lecture Notes in Computer Science</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16357">arXiv:2305.16357</a> <span> [<a href="https://arxiv.org/pdf/2305.16357">pdf</a>, <a href="https://arxiv.org/format/2305.16357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> EDM3: Event Detection as Multi-task Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Anantheswaran%2C+U">Ujjwala Anantheswaran</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+H">Himanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Pal%2C+K+K">Kuntal Kumar Pal</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16357v1-abstract-short" style="display: inline;"> Event detection refers to identifying event occurrences in a text and comprises of two subtasks; event identification and classification. We present EDM3, a novel approach for Event Detection that formulates three generative tasks: identification, classification, and combined detection. We show that EDM3 helps to learn transferable knowledge that can be leveraged to perform Event Detection and its… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16357v1-abstract-full').style.display = 'inline'; document.getElementById('2305.16357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16357v1-abstract-full" style="display: none;"> Event detection refers to identifying event occurrences in a text and comprises of two subtasks; event identification and classification. We present EDM3, a novel approach for Event Detection that formulates three generative tasks: identification, classification, and combined detection. We show that EDM3 helps to learn transferable knowledge that can be leveraged to perform Event Detection and its subtasks concurrently, mitigating the error propagation inherent in pipelined approaches. Unlike previous dataset- or domain-specific approaches, EDM3 utilizes the existing knowledge of language models, allowing it to be trained over any classification schema. We evaluate EDM3 on multiple event detection datasets: RAMS, WikiEvents, MAVEN, and MLEE, showing that EDM3 outperforms 1) single-task performance by 8.4% on average and 2) multi-task performance without instructional prompts by 2.4% on average. We obtain SOTA results on RAMS (71.3% vs. 65.1% F-1) and competitive performance on other datasets. We analyze our approach to demonstrate its efficacy in low-resource and multi-sentence settings. We also show the effectiveness of this approach on non-standard event configurations such as multi-word and multi-class event triggers. Overall, our results show that EDM3 is a promising approach for Event Detection that has the potential for real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16357v1-abstract-full').style.display = 'none'; document.getElementById('2305.16357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures, 10 tables, 5 Page appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12096">arXiv:2305.12096</a> <span> [<a href="https://arxiv.org/pdf/2305.12096">pdf</a>, <a href="https://arxiv.org/format/2305.12096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can NLP Models Correctly Reason Over Contexts that Break the Common Assumptions? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+N">Nisarg Patel</a>, <a href="/search/cs?searchtype=author&query=Handa%2C+D">Divij Handa</a>, <a href="/search/cs?searchtype=author&query=Sarkar%2C+S">Sayantan Sarkar</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12096v1-abstract-short" style="display: inline;"> Pre-training on large corpora of text enables the language models to acquire a vast amount of factual and commonsense knowledge which allows them to achieve remarkable performance on a variety of language understanding tasks. They typically acquire this knowledge by learning from the pre-training text and capturing certain patterns from it. However, real-world settings often present scenarios that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12096v1-abstract-full').style.display = 'inline'; document.getElementById('2305.12096v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12096v1-abstract-full" style="display: none;"> Pre-training on large corpora of text enables the language models to acquire a vast amount of factual and commonsense knowledge which allows them to achieve remarkable performance on a variety of language understanding tasks. They typically acquire this knowledge by learning from the pre-training text and capturing certain patterns from it. However, real-world settings often present scenarios that do not abide by these patterns i.e. scenarios that break the common assumptions. Can state-of-the-art NLP models correctly reason over the contexts of such scenarios? Addressing the above question, in this paper, we investigate the ability of models to correctly reason over contexts that break the common assumptions. To this end, we first systematically create evaluation data in which each data instance consists of (a) a common assumption, (b) a context that follows the assumption, (c) a context that breaks the assumption, and (d) questions based on the contexts. Then, through evaluations on multiple models including GPT-3 and Flan T5, we show that while doing fairly well on contexts that follow the common assumptions, the models struggle to correctly reason over contexts that break those assumptions. Specifically, the performance gap is as high as 20% absolute points. Furthermore, we thoroughly analyze these results revealing several interesting findings. We believe our work and findings will encourage and facilitate further research in developing more robust models that can also reliably reason over contexts that break the common assumptions. Data is available at \url{https://github.com/nrjvarshney/break_the_common_assumptions}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12096v1-abstract-full').style.display = 'none'; document.getElementById('2305.12096v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00152">arXiv:2302.00152</a> <span> [<a href="https://arxiv.org/pdf/2302.00152">pdf</a>, <a href="https://arxiv.org/format/2302.00152">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> TwinExplainer: Explaining Predictions of an Automotive Digital Twin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Neupane%2C+S">Subash Neupane</a>, <a href="/search/cs?searchtype=author&query=Fernandez%2C+I+A">Ivan A. Fernandez</a>, <a href="/search/cs?searchtype=author&query=Patterson%2C+W">Wilson Patterson</a>, <a href="/search/cs?searchtype=author&query=Mittal%2C+S">Sudip Mittal</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Milan Parmar</a>, <a href="/search/cs?searchtype=author&query=Rahimi%2C+S">Shahram Rahimi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00152v1-abstract-short" style="display: inline;"> Vehicles are complex Cyber Physical Systems (CPS) that operate in a variety of environments, and the likelihood of failure of one or more subsystems, such as the engine, transmission, brakes, and fuel, can result in unscheduled downtime and incur high maintenance or repair costs. In order to prevent these issues, it is crucial to continuously monitor the health of various subsystems and identify a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00152v1-abstract-full').style.display = 'inline'; document.getElementById('2302.00152v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00152v1-abstract-full" style="display: none;"> Vehicles are complex Cyber Physical Systems (CPS) that operate in a variety of environments, and the likelihood of failure of one or more subsystems, such as the engine, transmission, brakes, and fuel, can result in unscheduled downtime and incur high maintenance or repair costs. In order to prevent these issues, it is crucial to continuously monitor the health of various subsystems and identify abnormal sensor channel behavior. Data-driven Digital Twin (DT) systems are capable of such a task. Current DT technologies utilize various Deep Learning (DL) techniques that are constrained by the lack of justification or explanation for their predictions. This inability of these opaque systems can influence decision-making and raises user trust concerns. This paper presents a solution to this issue, where the TwinExplainer system, with its three-layered architectural pipeline, explains the predictions of an automotive DT. Such a system can assist automotive stakeholders in understanding the global scale of the sensor channels and how they contribute towards generic DT predictions. TwinExplainer can also visualize explanations for both normal and abnormal local predictions computed by the DT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00152v1-abstract-full').style.display = 'none'; document.getElementById('2302.00152v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.11544">arXiv:2301.11544</a> <span> [<a href="https://arxiv.org/pdf/2301.11544">pdf</a>, <a href="https://arxiv.org/format/2301.11544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Targeted Attacks on Timeseries Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Govindarajulu%2C+Y">Yuvaraj Govindarajulu</a>, <a href="/search/cs?searchtype=author&query=Amballa%2C+A">Avinash Amballa</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+P">Pavan Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.11544v1-abstract-short" style="display: inline;"> Real-world deep learning models developed for Time Series Forecasting are used in several critical applications ranging from medical devices to the security domain. Many previous works have shown how deep learning models are prone to adversarial attacks and studied their vulnerabilities. However, the vulnerabilities of time series models for forecasting due to adversarial inputs are not extensivel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.11544v1-abstract-full').style.display = 'inline'; document.getElementById('2301.11544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.11544v1-abstract-full" style="display: none;"> Real-world deep learning models developed for Time Series Forecasting are used in several critical applications ranging from medical devices to the security domain. Many previous works have shown how deep learning models are prone to adversarial attacks and studied their vulnerabilities. However, the vulnerabilities of time series models for forecasting due to adversarial inputs are not extensively explored. While the attack on a forecasting model might aim to deteriorate the performance of the model, it is more effective, if the attack is focused on a specific impact on the model's output. In this paper, we propose a novel formulation of Directional, Amplitudinal, and Temporal targeted adversarial attacks on time series forecasting models. These targeted attacks create a specific impact on the amplitude and direction of the output prediction. We use the existing adversarial attack techniques from the computer vision domain and adapt them for time series. Additionally, we propose a modified version of the Auto Projected Gradient Descent attack for targeted attacks. We examine the impact of the proposed targeted attacks versus untargeted attacks. We use KS-Tests to statistically demonstrate the impact of the attack. Our experimental results show how targeted attacks on time series models are viable and are more powerful in terms of statistical similarity. It is, hence difficult to detect through statistical methods. We believe that this work opens a new paradigm in the time series forecasting domain and represents an important consideration for developing better defenses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.11544v1-abstract-full').style.display = 'none'; document.getElementById('2301.11544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.14018">arXiv:2210.14018</a> <span> [<a href="https://arxiv.org/pdf/2210.14018">pdf</a>, <a href="https://arxiv.org/format/2210.14018">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A White-Box Adversarial Attack Against a Digital Twin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Patterson%2C+W">Wilson Patterson</a>, <a href="/search/cs?searchtype=author&query=Fernandez%2C+I">Ivan Fernandez</a>, <a href="/search/cs?searchtype=author&query=Neupane%2C+S">Subash Neupane</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Milan Parmar</a>, <a href="/search/cs?searchtype=author&query=Mittal%2C+S">Sudip Mittal</a>, <a href="/search/cs?searchtype=author&query=Rahimi%2C+S">Shahram Rahimi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.14018v1-abstract-short" style="display: inline;"> Recent research has shown that Machine Learning/Deep Learning (ML/DL) models are particularly vulnerable to adversarial perturbations, which are small changes made to the input data in order to fool a machine learning classifier. The Digital Twin, which is typically described as consisting of a physical entity, a virtual counterpart, and the data connections in between, is increasingly being inves… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.14018v1-abstract-full').style.display = 'inline'; document.getElementById('2210.14018v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.14018v1-abstract-full" style="display: none;"> Recent research has shown that Machine Learning/Deep Learning (ML/DL) models are particularly vulnerable to adversarial perturbations, which are small changes made to the input data in order to fool a machine learning classifier. The Digital Twin, which is typically described as consisting of a physical entity, a virtual counterpart, and the data connections in between, is increasingly being investigated as a means of improving the performance of physical entities by leveraging computational techniques, which are enabled by the virtual counterpart. This paper explores the susceptibility of Digital Twin (DT), a virtual model designed to accurately reflect a physical object using ML/DL classifiers that operate as Cyber Physical Systems (CPS), to adversarial attacks. As a proof of concept, we first formulate a DT of a vehicular system using a deep neural network architecture and then utilize it to launch an adversarial attack. We attack the DT model by perturbing the input to the trained model and show how easily the model can be broken with white-box attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.14018v1-abstract-full').style.display = 'none'; document.getElementById('2210.14018v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 38th ACM Annual Computer Security Applications Conference 2023 (ACM ACSAC '23) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.02419">arXiv:2207.02419</a> <span> [<a href="https://arxiv.org/pdf/2207.02419">pdf</a>, <a href="https://arxiv.org/format/2207.02419">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BioTABQA: Instruction Learning for Biomedical Table Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/cs?searchtype=author&query=Saxena%2C+S">Sharad Saxena</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.02419v1-abstract-short" style="display: inline;"> Table Question Answering (TQA) is an important but under-explored task. Most of the existing QA datasets are in unstructured text format and only few of them use tables as the context. To the best of our knowledge, none of TQA datasets exist in the biomedical domain where tables are frequently used to present information. In this paper, we first curate a table question answering dataset, BioTABQA,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.02419v1-abstract-full').style.display = 'inline'; document.getElementById('2207.02419v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.02419v1-abstract-full" style="display: none;"> Table Question Answering (TQA) is an important but under-explored task. Most of the existing QA datasets are in unstructured text format and only few of them use tables as the context. To the best of our knowledge, none of TQA datasets exist in the biomedical domain where tables are frequently used to present information. In this paper, we first curate a table question answering dataset, BioTABQA, using 22 templates and the context from a biomedical textbook on differential diagnosis. BioTABQA can not only be used to teach a model how to answer questions from tables but also evaluate how a model generalizes to unseen questions, an important scenario for biomedical applications. To achieve the generalization evaluation, we divide the templates into 17 training and 5 cross-task evaluations. Then, we develop two baselines using single and multi-tasks learning on BioTABQA. Furthermore, we explore instructional learning, a recent technique showing impressive generalizing performance. Experimental results show that our instruction-tuned model outperforms single and multi-task baselines on an average by ~23% and ~6% across various evaluation settings, and more importantly, instruction-tuned model outperforms baselines by ~5% on cross-tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.02419v1-abstract-full').style.display = 'none'; document.getElementById('2207.02419v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BioASQ10 Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12538">arXiv:2205.12538</a> <span> [<a href="https://arxiv.org/pdf/2205.12538">pdf</a>, <a href="https://arxiv.org/format/2205.12538">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Is a Question Decomposition Unit All We Need? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Patel%2C+P">Pruthvi Patel</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12538v2-abstract-short" style="display: inline;"> Large Language Models (LMs) have achieved state-of-the-art performance on many Natural Language Processing (NLP) benchmarks. With the growing number of new benchmarks, we build bigger and more complex LMs. However, building new LMs may not be an ideal option owing to the cost, time and environmental impact associated with it. We explore an alternative route: can we modify data by expressing it in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12538v2-abstract-full').style.display = 'inline'; document.getElementById('2205.12538v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12538v2-abstract-full" style="display: none;"> Large Language Models (LMs) have achieved state-of-the-art performance on many Natural Language Processing (NLP) benchmarks. With the growing number of new benchmarks, we build bigger and more complex LMs. However, building new LMs may not be an ideal option owing to the cost, time and environmental impact associated with it. We explore an alternative route: can we modify data by expressing it in terms of the model's strengths, so that a question becomes easier for models to answer? We investigate if humans can decompose a hard question into a set of simpler questions that are relatively easier for models to solve. We analyze a range of datasets involving various forms of reasoning and find that it is indeed possible to significantly improve model performance (24% for GPT3 and 29% for RoBERTa-SQuAD along with a symbolic calculator) via decomposition. Our approach provides a viable option to involve people in NLP research in a meaningful way. Our findings indicate that Human-in-the-loop Question Decomposition (HQD) can potentially provide an alternate path to building large LMs. Code and data is available at https://github.com/Pruthvi98/QuestionDecomposition <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12538v2-abstract-full').style.display = 'none'; document.getElementById('2205.12538v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022 (17 pages)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.00415">arXiv:2205.00415</a> <span> [<a href="https://arxiv.org/pdf/2205.00415">pdf</a>, <a href="https://arxiv.org/format/2205.00415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Don't Blame the Annotator: Bias Already Starts in the Annotation Instructions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Geva%2C+M">Mor Geva</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.00415v3-abstract-short" style="display: inline;"> In recent years, progress in NLU has been driven by benchmarks. These benchmarks are typically collected by crowdsourcing, where annotators write examples based on annotation instructions crafted by dataset creators. In this work, we hypothesize that annotators pick up on patterns in the crowdsourcing instructions, which bias them to write many similar examples that are then over-represented in th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00415v3-abstract-full').style.display = 'inline'; document.getElementById('2205.00415v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.00415v3-abstract-full" style="display: none;"> In recent years, progress in NLU has been driven by benchmarks. These benchmarks are typically collected by crowdsourcing, where annotators write examples based on annotation instructions crafted by dataset creators. In this work, we hypothesize that annotators pick up on patterns in the crowdsourcing instructions, which bias them to write many similar examples that are then over-represented in the collected data. We study this form of bias, termed instruction bias, in 14 recent NLU benchmarks, showing that instruction examples often exhibit concrete patterns, which are propagated by crowdworkers to the collected data. This extends previous work (Geva et al., 2019) and raises a new concern of whether we are modeling the dataset creator's instructions, rather than the task. Through a series of experiments, we show that, indeed, instruction bias can lead to overestimation of model performance, and that models struggle to generalize beyond biases originating in the crowdsourcing instructions. We further analyze the influence of instruction bias in terms of pattern frequency and model size, and derive concrete recommendations for creating future NLU benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00415v3-abstract-full').style.display = 'none'; document.getElementById('2205.00415v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EACL 2023 (Outstanding Paper Award)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07705">arXiv:2204.07705</a> <span> [<a href="https://arxiv.org/pdf/2204.07705">pdf</a>, <a href="https://arxiv.org/format/2204.07705">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Super-NaturalInstructions: Generalization via Declarative Instructions on 1600+ NLP Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yizhong Wang</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Alipoormolabashi%2C+P">Pegah Alipoormolabashi</a>, <a href="/search/cs?searchtype=author&query=Kordi%2C+Y">Yeganeh Kordi</a>, <a href="/search/cs?searchtype=author&query=Mirzaei%2C+A">Amirreza Mirzaei</a>, <a href="/search/cs?searchtype=author&query=Arunkumar%2C+A">Anjana Arunkumar</a>, <a href="/search/cs?searchtype=author&query=Ashok%2C+A">Arjun Ashok</a>, <a href="/search/cs?searchtype=author&query=Dhanasekaran%2C+A+S">Arut Selvan Dhanasekaran</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Stap%2C+D">David Stap</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+E">Eshaan Pathak</a>, <a href="/search/cs?searchtype=author&query=Karamanolakis%2C+G">Giannis Karamanolakis</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+H+G">Haizhi Gary Lai</a>, <a href="/search/cs?searchtype=author&query=Purohit%2C+I">Ishan Purohit</a>, <a href="/search/cs?searchtype=author&query=Mondal%2C+I">Ishani Mondal</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+J">Jacob Anderson</a>, <a href="/search/cs?searchtype=author&query=Kuznia%2C+K">Kirby Kuznia</a>, <a href="/search/cs?searchtype=author&query=Doshi%2C+K">Krima Doshi</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+M">Maitreya Patel</a>, <a href="/search/cs?searchtype=author&query=Pal%2C+K+K">Kuntal Kumar Pal</a>, <a href="/search/cs?searchtype=author&query=Moradshahi%2C+M">Mehrad Moradshahi</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Purohit%2C+M">Mirali Purohit</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Kaza%2C+P+R">Phani Rohitha Kaza</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07705v3-abstract-short" style="display: inline;"> How well can NLP models generalize to a variety of unseen tasks when provided with task instructions? To address this question, we first introduce Super-NaturalInstructions, a benchmark of 1,616 diverse NLP tasks and their expert-written instructions. Our collection covers 76 distinct task types, including but not limited to classification, extraction, infilling, sequence tagging, text rewriting,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07705v3-abstract-full').style.display = 'inline'; document.getElementById('2204.07705v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07705v3-abstract-full" style="display: none;"> How well can NLP models generalize to a variety of unseen tasks when provided with task instructions? To address this question, we first introduce Super-NaturalInstructions, a benchmark of 1,616 diverse NLP tasks and their expert-written instructions. Our collection covers 76 distinct task types, including but not limited to classification, extraction, infilling, sequence tagging, text rewriting, and text composition. This large and diverse collection of tasks enables rigorous benchmarking of cross-task generalization under instructions -- training models to follow instructions on a subset of tasks and evaluating them on the remaining unseen ones. Furthermore, we build Tk-Instruct, a transformer model trained to follow a variety of in-context instructions (plain language task definitions or k-shot examples). Our experiments show that Tk-Instruct outperforms existing instruction-following models such as InstructGPT by over 9% on our benchmark despite being an order of magnitude smaller. We further analyze generalization as a function of various scaling parameters, such as the number of observed tasks, the number of instances per task, and model sizes. We hope our dataset and model facilitate future progress towards more general-purpose NLP models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07705v3-abstract-full').style.display = 'none'; document.getElementById('2204.07705v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2022, 25 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07600">arXiv:2204.07600</a> <span> [<a href="https://arxiv.org/pdf/2204.07600">pdf</a>, <a href="https://arxiv.org/format/2204.07600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> In-BoXBART: Get Instructions into Biomedical Multi-Task Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Purohit%2C+M">Mirali Purohit</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/cs?searchtype=author&query=Murad%2C+M+H">M. Hassan Murad</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07600v1-abstract-short" style="display: inline;"> Single-task models have proven pivotal in solving specific tasks; however, they have limitations in real-world applications where multi-tasking is necessary and domain shifts are exhibited. Recently, instructional prompts have shown significant improvement towards multi-task generalization; however, the effect of instructional prompts and Multi-Task Learning (MTL) has not been systematically studi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07600v1-abstract-full').style.display = 'inline'; document.getElementById('2204.07600v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07600v1-abstract-full" style="display: none;"> Single-task models have proven pivotal in solving specific tasks; however, they have limitations in real-world applications where multi-tasking is necessary and domain shifts are exhibited. Recently, instructional prompts have shown significant improvement towards multi-task generalization; however, the effect of instructional prompts and Multi-Task Learning (MTL) has not been systematically studied in the biomedical domain. Motivated by this, this paper explores the impact of instructional prompts for biomedical MTL. We introduce the BoX, a collection of 32 instruction tasks for Biomedical NLP across (X) various categories. Using this meta-dataset, we propose a unified model termed In-BoXBART, that can jointly learn all tasks of the BoX without any task-specific modules. To the best of our knowledge, this is the first attempt to propose a unified model in the biomedical domain and use instructions to achieve generalization across several biomedical tasks. Experimental results indicate that the proposed model: 1) outperforms the single-task baseline by ~3% and multi-task (without instruction) baseline by ~18% on an average, and 2) shows ~23% improvement compared to the single-task baseline in few-shot learning (i.e., 32 instances per task) on an average. Our analysis indicates that there is significant room for improvement across tasks in the BoX, implying the scope for future research direction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07600v1-abstract-full').style.display = 'none'; document.getElementById('2204.07600v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2022 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.02058">arXiv:2204.02058</a> <span> [<a href="https://arxiv.org/pdf/2204.02058">pdf</a>, <a href="https://arxiv.org/format/2204.02058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HyperBox: A Supervised Approach for Hypernym Discovery using Box Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Maulik Parmar</a>, <a href="/search/cs?searchtype=author&query=Narayan%2C+A">Apurva Narayan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.02058v2-abstract-short" style="display: inline;"> Hypernymy plays a fundamental role in many AI tasks like taxonomy learning, ontology learning, etc. This has motivated the development of many automatic identification methods for extracting this relation, most of which rely on word distribution. We present a novel model HyperBox to learn box embeddings for hypernym discovery. Given an input term, HyperBox retrieves its suitable hypernym from a ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02058v2-abstract-full').style.display = 'inline'; document.getElementById('2204.02058v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.02058v2-abstract-full" style="display: none;"> Hypernymy plays a fundamental role in many AI tasks like taxonomy learning, ontology learning, etc. This has motivated the development of many automatic identification methods for extracting this relation, most of which rely on word distribution. We present a novel model HyperBox to learn box embeddings for hypernym discovery. Given an input term, HyperBox retrieves its suitable hypernym from a target corpus. For this task, we use the dataset published for SemEval 2018 Shared Task on Hypernym Discovery. We compare the performance of our model on two specific domains of knowledge: medical and music. Experimentally, we show that our model outperforms existing methods on the majority of the evaluation metrics. Moreover, our model generalize well over unseen hypernymy pairs using only a small set of training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02058v2-abstract-full').style.display = 'none'; document.getElementById('2204.02058v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.09161">arXiv:2203.09161</a> <span> [<a href="https://arxiv.org/pdf/2203.09161">pdf</a>, <a href="https://arxiv.org/format/2203.09161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> How Many Data Samples is an Additional Instruction Worth? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Puri%2C+R+S">Ravsehaj Singh Puri</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.09161v3-abstract-short" style="display: inline;"> Recently introduced instruction-paradigm empowers non-expert users to leverage NLP resources by defining a new task in natural language. Instruction-tuned models have significantly outperformed multitask learning models (without instruction); however they are far from state-of-the-art task-specific models. Conventional approaches to improve model performance via creating datasets with large number… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.09161v3-abstract-full').style.display = 'inline'; document.getElementById('2203.09161v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.09161v3-abstract-full" style="display: none;"> Recently introduced instruction-paradigm empowers non-expert users to leverage NLP resources by defining a new task in natural language. Instruction-tuned models have significantly outperformed multitask learning models (without instruction); however they are far from state-of-the-art task-specific models. Conventional approaches to improve model performance via creating datasets with large number of task instances or architectural changes in the model may not be feasible for non-expert users. However, they can write alternate instructions to represent an instruction task. Is Instruction-augmentation helpful? We augment a subset of tasks in the expanded version of NATURAL INSTRUCTIONS with additional instructions and find that it significantly improves model performance (up to 35%), especially in the low-data regime. Our results indicate that an additional instruction can be equivalent to ~200 data samples on average across tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.09161v3-abstract-full').style.display = 'none'; document.getElementById('2203.09161v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EACL 2023 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.08597">arXiv:2203.08597</a> <span> [<a href="https://arxiv.org/pdf/2203.08597">pdf</a>, <a href="https://arxiv.org/format/2203.08597">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Less is More: Summary of Long Instructions is Better for Program Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kuznia%2C+K">Kirby Kuznia</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.08597v2-abstract-short" style="display: inline;"> Despite the success of large pre-trained language models (LMs) such as Codex, they show below-par performance on the larger and more complicated programming related questions. We show that LMs benefit from the summarized version of complicated questions. Our findings show that superfluous information often present in problem description such as human characters, background stories, and names (whic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.08597v2-abstract-full').style.display = 'inline'; document.getElementById('2203.08597v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.08597v2-abstract-full" style="display: none;"> Despite the success of large pre-trained language models (LMs) such as Codex, they show below-par performance on the larger and more complicated programming related questions. We show that LMs benefit from the summarized version of complicated questions. Our findings show that superfluous information often present in problem description such as human characters, background stories, and names (which are included to help humans in understanding a task) does not help models in understanding a task. To this extent, we create a meta-dataset from the frequently used APPS dataset and the newly created CodeContests dataset for the program synthesis task. Our meta-dataset consists of human and synthesized summaries of the long and complicated programming questions. Experimental results on Codex show that our proposed approach outperforms baseline by 8.13% on the APPS dataset and 11.88% on the CodeContests dataset on average in terms of strict accuracy. Our analysis shows that summaries significantly improve performance for introductory (9.86%) and interview (11.48%) programming questions. However, it shows improvement by a small margin (~ 2%) for competitive programming questions, implying scope for future research in this direction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.08597v2-abstract-full').style.display = 'none'; document.getElementById('2203.08597v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.09039">arXiv:2202.09039</a> <span> [<a href="https://arxiv.org/pdf/2202.09039">pdf</a>, <a href="https://arxiv.org/format/2202.09039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Critical Checkpoints for Evaluating Defence Models Against Adversarial Attack and Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tekwani%2C+K">Kanak Tekwani</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.09039v1-abstract-short" style="display: inline;"> From past couple of years there is a cycle of researchers proposing a defence model for adversaries in machine learning which is arguably defensible to most of the existing attacks in restricted condition (they evaluate on some bounded inputs or datasets). And then shortly another set of researcher finding the vulnerabilities in that defence model and breaking it by proposing a stronger attack mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09039v1-abstract-full').style.display = 'inline'; document.getElementById('2202.09039v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.09039v1-abstract-full" style="display: none;"> From past couple of years there is a cycle of researchers proposing a defence model for adversaries in machine learning which is arguably defensible to most of the existing attacks in restricted condition (they evaluate on some bounded inputs or datasets). And then shortly another set of researcher finding the vulnerabilities in that defence model and breaking it by proposing a stronger attack model. Some common flaws are been noticed in the past defence models that were broken in very short time. Defence models being broken so easily is a point of concern as decision of many crucial activities are taken with the help of machine learning models. So there is an utter need of some defence checkpoints that any researcher should keep in mind while evaluating the soundness of technique and declaring it to be decent defence technique. In this paper, we have suggested few checkpoints that should be taken into consideration while building and evaluating the soundness of defence models. All these points are recommended after observing why some past defence models failed and how some model remained adamant and proved their soundness against some of the very strong attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09039v1-abstract-full').style.display = 'none'; document.getElementById('2202.09039v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.02910">arXiv:2112.02910</a> <span> [<a href="https://arxiv.org/pdf/2112.02910">pdf</a>, <a href="https://arxiv.org/format/2112.02910">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Tale of Color Variants: Representation and Self-Supervised Learning in Fashion E-Commerce </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dutta%2C+U+K">Ujjal Kr Dutta</a>, <a href="/search/cs?searchtype=author&query=Repakula%2C+S">Sandeep Repakula</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Maulik Parmar</a>, <a href="/search/cs?searchtype=author&query=Ravi%2C+A">Abhinav Ravi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.02910v1-abstract-short" style="display: inline;"> In this paper, we address a crucial problem in fashion e-commerce (with respect to customer experience, as well as revenue): color variants identification, i.e., identifying fashion products that match exactly in their design (or style), but only to differ in their color. We propose a generic framework, that leverages deep visual Representation Learning at its heart, to address this problem for ou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.02910v1-abstract-full').style.display = 'inline'; document.getElementById('2112.02910v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.02910v1-abstract-full" style="display: none;"> In this paper, we address a crucial problem in fashion e-commerce (with respect to customer experience, as well as revenue): color variants identification, i.e., identifying fashion products that match exactly in their design (or style), but only to differ in their color. We propose a generic framework, that leverages deep visual Representation Learning at its heart, to address this problem for our fashion e-commerce platform. Our framework could be trained with supervisory signals in the form of triplets, that are obtained manually. However, it is infeasible to obtain manual annotations for the entire huge collection of data usually present in fashion e-commerce platforms, such as ours, while capturing all the difficult corner cases. But, to our rescue, interestingly we observed that this crucial problem in fashion e-commerce could also be solved by simple color jitter based image augmentation, that recently became widely popular in the contrastive Self-Supervised Learning (SSL) literature, that seeks to learn visual representations without using manual labels. This naturally led to a question in our mind: Could we leverage SSL in our use-case, and still obtain comparable performance to our supervised framework? The answer is, Yes! because, color variant fashion objects are nothing but manifestations of a style, in different colors, and a model trained to be invariant to the color (with, or without supervision), should be able to recognize this! This is what the paper further demonstrates, both qualitatively, and quantitatively, while evaluating a couple of state-of-the-art SSL techniques, and also proposing a novel method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.02910v1-abstract-full').style.display = 'none'; document.getElementById('2112.02910v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Annual Conference on Innovative Applications of Artificial Intelligence (IAAI)/ AAAI Conference on Artificial Intelligence (AAAI) 2022. arXiv admin note: substantial text overlap with arXiv:2104.08581</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.04865">arXiv:2109.04865</a> <span> [<a href="https://arxiv.org/pdf/2109.04865">pdf</a>, <a href="https://arxiv.org/format/2109.04865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Emerging AI Security Threats for Autonomous Cars -- Case Studies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lekkala%2C+S">Shanthi Lekkala</a>, <a href="/search/cs?searchtype=author&query=Motwani%2C+T">Tanya Motwani</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a>, <a href="/search/cs?searchtype=author&query=Phadke%2C+A">Amit Phadke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.04865v1-abstract-short" style="display: inline;"> Artificial Intelligence has made a significant contribution to autonomous vehicles, from object detection to path planning. However, AI models require a large amount of sensitive training data and are usually computationally intensive to build. The commercial value of such models motivates attackers to mount various attacks. Adversaries can launch model extraction attacks for monetization purposes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.04865v1-abstract-full').style.display = 'inline'; document.getElementById('2109.04865v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.04865v1-abstract-full" style="display: none;"> Artificial Intelligence has made a significant contribution to autonomous vehicles, from object detection to path planning. However, AI models require a large amount of sensitive training data and are usually computationally intensive to build. The commercial value of such models motivates attackers to mount various attacks. Adversaries can launch model extraction attacks for monetization purposes or step-ping-stone towards other attacks like model evasion. In specific cases, it even results in destroying brand reputation, differentiation, and value proposition. In addition, IP laws and AI-related legalities are still evolving and are not uniform across countries. We discuss model extraction attacks in detail with two use-cases and a generic kill-chain that can compromise autonomous cars. It is essential to investigate strategies to manage and mitigate the risk of model theft. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.04865v1-abstract-full').style.display = 'none'; document.getElementById('2109.04865v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures; Manuscript is accepted at ESCAR Europe 2021 conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.08581">arXiv:2104.08581</a> <span> [<a href="https://arxiv.org/pdf/2104.08581">pdf</a>, <a href="https://arxiv.org/format/2104.08581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Color Variants Identification in Fashion e-commerce via Contrastive Self-Supervised Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dutta%2C+U+K">Ujjal Kr Dutta</a>, <a href="/search/cs?searchtype=author&query=Repakula%2C+S">Sandeep Repakula</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Maulik Parmar</a>, <a href="/search/cs?searchtype=author&query=Ravi%2C+A">Abhinav Ravi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.08581v2-abstract-short" style="display: inline;"> In this paper, we utilize deep visual Representation Learning to address an important problem in fashion e-commerce: color variants identification, i.e., identifying fashion products that match exactly in their design (or style), but only to differ in their color. At first we attempt to tackle the problem by obtaining manual annotations (depicting whether two products are color variants), and trai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08581v2-abstract-full').style.display = 'inline'; document.getElementById('2104.08581v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.08581v2-abstract-full" style="display: none;"> In this paper, we utilize deep visual Representation Learning to address an important problem in fashion e-commerce: color variants identification, i.e., identifying fashion products that match exactly in their design (or style), but only to differ in their color. At first we attempt to tackle the problem by obtaining manual annotations (depicting whether two products are color variants), and train a supervised triplet loss based neural network model to learn representations of fashion products. However, for large scale real-world industrial datasets such as addressed in our paper, it is infeasible to obtain annotations for the entire dataset, while capturing all the difficult corner cases. Interestingly, we observed that color variants are essentially manifestations of color jitter based augmentations. Thus, we instead explore Self-Supervised Learning (SSL) to solve this problem. We observed that existing state-of-the-art SSL methods perform poor, for our problem. To address this, we propose a novel SSL based color variants model that simultaneously focuses on different parts of an apparel. Quantitative and qualitative evaluation shows that our method outperforms existing SSL methods, and at times, the supervised model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08581v2-abstract-full').style.display = 'none'; document.getElementById('2104.08581v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted In IJCAI-21 Weakly Supervised Representation Learning (WSRL) workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.15406">arXiv:2103.15406</a> <span> [<a href="https://arxiv.org/pdf/2103.15406">pdf</a>, <a href="https://arxiv.org/ps/2103.15406">ps</a>, <a href="https://arxiv.org/format/2103.15406">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Fundamental Challenges in Deep Learning for Stiff Contact Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Halm%2C+M">Mathew Halm</a>, <a href="/search/cs?searchtype=author&query=Posa%2C+M">Michael Posa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.15406v1-abstract-short" style="display: inline;"> Frictional contact has been extensively studied as the core underlying behavior of legged locomotion and manipulation, and its nearly-discontinuous nature makes planning and control difficult even when an accurate model of the robot is available. Here, we present empirical evidence that learning an accurate model in the first place can be confounded by contact, as modern deep learning approaches a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15406v1-abstract-full').style.display = 'inline'; document.getElementById('2103.15406v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.15406v1-abstract-full" style="display: none;"> Frictional contact has been extensively studied as the core underlying behavior of legged locomotion and manipulation, and its nearly-discontinuous nature makes planning and control difficult even when an accurate model of the robot is available. Here, we present empirical evidence that learning an accurate model in the first place can be confounded by contact, as modern deep learning approaches are not designed to capture this non-smoothness. We isolate the effects of contact's non-smoothness by varying the mechanical stiffness of a compliant contact simulator. Even for a simple system, we find that stiffness alone dramatically degrades training processes, generalization, and data-efficiency. Our results raise serious questions about simulated testing environments which do not accurately reflect the stiffness of rigid robotic hardware. Significant additional investigation will be necessary to fully understand and mitigate these effects, and we suggest several avenues for future study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15406v1-abstract-full').style.display = 'none'; document.getElementById('2103.15406v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.02017">arXiv:2101.02017</a> <span> [<a href="https://arxiv.org/pdf/2101.02017">pdf</a>, <a href="https://arxiv.org/ps/2101.02017">ps</a>, <a href="https://arxiv.org/format/2101.02017">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> COVID-19: Comparative Analysis of Methods for Identifying Articles Related to Therapeutics and Vaccines without Using Labeled Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Ambalavanan%2C+A+K">Ashwin Karthik Ambalavanan</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Hong Guan</a>, <a href="/search/cs?searchtype=author&query=Banerjee%2C+R">Rishab Banerjee</a>, <a href="/search/cs?searchtype=author&query=Pabla%2C+J">Jitesh Pabla</a>, <a href="/search/cs?searchtype=author&query=Devarakonda%2C+M">Murthy Devarakonda</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.02017v1-abstract-short" style="display: inline;"> Here we proposed an approach to analyze text classification methods based on the presence or absence of task-specific terms (and their synonyms) in the text. We applied this approach to study six different transfer-learning and unsupervised methods for screening articles relevant to COVID-19 vaccines and therapeutics. The analysis revealed that while a BERT model trained on search-engine results g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.02017v1-abstract-full').style.display = 'inline'; document.getElementById('2101.02017v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.02017v1-abstract-full" style="display: none;"> Here we proposed an approach to analyze text classification methods based on the presence or absence of task-specific terms (and their synonyms) in the text. We applied this approach to study six different transfer-learning and unsupervised methods for screening articles relevant to COVID-19 vaccines and therapeutics. The analysis revealed that while a BERT model trained on search-engine results generally performed well, it miss-classified relevant abstracts that did not contain task-specific terms. We used this insight to create a more effective unsupervised ensemble. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.02017v1-abstract-full').style.display = 'none'; document.getElementById('2101.02017v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 Tables, Appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.03795">arXiv:2010.03795</a> <span> [<a href="https://arxiv.org/pdf/2010.03795">pdf</a>, <a href="https://arxiv.org/format/2010.03795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Mapping of Real World Problems to Nature Inspired Algorithm using Goal based Classification and TRIZ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sukharamwala%2C+P">Palak Sukharamwala</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.03795v1-abstract-short" style="display: inline;"> The technologies and algorithms are growing at an exponential rate. The technologies are capable enough to solve technically challenging and complex problems which seemed impossible task. However, the trending methods and approaches are facing multiple challenges on various fronts of data, algorithms, software, computational complexities, and energy efficiencies. Nature also faces similar challeng… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.03795v1-abstract-full').style.display = 'inline'; document.getElementById('2010.03795v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.03795v1-abstract-full" style="display: none;"> The technologies and algorithms are growing at an exponential rate. The technologies are capable enough to solve technically challenging and complex problems which seemed impossible task. However, the trending methods and approaches are facing multiple challenges on various fronts of data, algorithms, software, computational complexities, and energy efficiencies. Nature also faces similar challenges. Nature has solved those challenges and formulation of those are available as Nature Inspired Algorithms (NIA), which are derived based on the study of nature. A novel method based on TRIZ to map the real-world problems to nature problems is explained here.TRIZ is a Theory of inventive problem solving. Using the proposed framework, best NIA can be identified to solve the real-world problems. For this framework to work, a novel classification of NIA based on the end goal that nature is trying to achieve is devised. The application of the this framework along with examples is also discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.03795v1-abstract-full').style.display = 'none'; document.getElementById('2010.03795v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 9 figures, 3 figures; Under review for publication as book chapter</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.11638">arXiv:2008.11638</a> <span> [<a href="https://arxiv.org/pdf/2008.11638">pdf</a>, <a href="https://arxiv.org/format/2008.11638">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Buy Me That Look: An Approach for Recommending Similar Fashion Products </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravi%2C+A">Abhinav Ravi</a>, <a href="/search/cs?searchtype=author&query=Repakula%2C+S">Sandeep Repakula</a>, <a href="/search/cs?searchtype=author&query=Dutta%2C+U+K">Ujjal Kr Dutta</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Maulik Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.11638v2-abstract-short" style="display: inline;"> Have you ever looked at an Instagram model, or a model in a fashion e-commerce web-page, and thought \textit{"Wish I could get a list of fashion items similar to the ones worn by the model!"}. This is what we address in this paper, where we propose a novel computer vision based technique called \textbf{ShopLook} to address the challenging problem of recommending similar fashion products. The propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.11638v2-abstract-full').style.display = 'inline'; document.getElementById('2008.11638v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.11638v2-abstract-full" style="display: none;"> Have you ever looked at an Instagram model, or a model in a fashion e-commerce web-page, and thought \textit{"Wish I could get a list of fashion items similar to the ones worn by the model!"}. This is what we address in this paper, where we propose a novel computer vision based technique called \textbf{ShopLook} to address the challenging problem of recommending similar fashion products. The proposed method has been evaluated at Myntra (www.myntra.com), a leading online fashion e-commerce platform. In particular, given a user query and the corresponding Product Display Page (PDP) against the query, the goal of our method is to recommend similar fashion products corresponding to the entire set of fashion articles worn by a model in the PDP full-shot image (the one showing the entire model from head to toe). The novelty and strength of our method lies in its capability to recommend similar articles for all the fashion items worn by the model, in addition to the primary article corresponding to the query. This is not only important to promote cross-sells for boosting revenue, but also for improving customer experience and engagement. In addition, our approach is also capable of recommending similar products for User Generated Content (UGC), eg., fashion article images uploaded by users. Formally, our proposed method consists of the following components (in the same order): i) Human keypoint detection, ii) Pose classification, iii) Article localisation and object detection, along with active learning feedback, and iv) Triplet network based image embedding model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.11638v2-abstract-full').style.display = 'none'; document.getElementById('2008.11638v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the IEEE International Conference on Multimedia Information Processing and Retrieval (MIPR) 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.09022">arXiv:2006.09022</a> <span> [<a href="https://arxiv.org/pdf/2006.09022">pdf</a>, <a href="https://arxiv.org/format/2006.09022">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> NodeNet: A Graph Regularised Neural Network for Node Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dabhi%2C+S">Shrey Dabhi</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.09022v1-abstract-short" style="display: inline;"> Real-world events exhibit a high degree of interdependence and connections, and hence data points generated also inherit the linkages. However, the majority of AI/ML techniques leave out the linkages among data points. The recent surge of interest in graph-based AI/ML techniques is aimed to leverage the linkages. Graph-based learning algorithms utilize the data and related information effectively… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.09022v1-abstract-full').style.display = 'inline'; document.getElementById('2006.09022v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.09022v1-abstract-full" style="display: none;"> Real-world events exhibit a high degree of interdependence and connections, and hence data points generated also inherit the linkages. However, the majority of AI/ML techniques leave out the linkages among data points. The recent surge of interest in graph-based AI/ML techniques is aimed to leverage the linkages. Graph-based learning algorithms utilize the data and related information effectively to build superior models. Neural Graph Learning (NGL) is one such technique that utilizes a traditional machine learning algorithm with a modified loss function to leverage the edges in the graph structure. In this paper, we propose a model using NGL - NodeNet, to solve node classification task for citation graphs. We discuss our modifications and their relevance to the task. We further compare our results with the current state of the art and investigate reasons for the superior performance of NodeNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.09022v1-abstract-full').style.display = 'none'; document.getElementById('2006.09022v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.11021">arXiv:2004.11021</a> <span> [<a href="https://arxiv.org/pdf/2004.11021">pdf</a>, <a href="https://arxiv.org/format/2004.11021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Virtual SAR: A Synthetic Dataset for Deep Learning based Speckle Noise Reduction Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dabhi%2C+S">Shrey Dabhi</a>, <a href="/search/cs?searchtype=author&query=Soni%2C+K">Kartavya Soni</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+U">Utkarsh Patel</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+P">Priyanka Sharma</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.11021v1-abstract-short" style="display: inline;"> Synthetic Aperture Radar (SAR) images contain a huge amount of information, however, the number of practical use-cases is limited due to the presence of speckle noise in them. In recent years, deep learning based techniques have brought significant improvement in the domain of denoising and image restoration. However, further research has been hampered by the lack of availability of data suitable… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11021v1-abstract-full').style.display = 'inline'; document.getElementById('2004.11021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.11021v1-abstract-full" style="display: none;"> Synthetic Aperture Radar (SAR) images contain a huge amount of information, however, the number of practical use-cases is limited due to the presence of speckle noise in them. In recent years, deep learning based techniques have brought significant improvement in the domain of denoising and image restoration. However, further research has been hampered by the lack of availability of data suitable for training deep neural network based systems. With this paper, we propose a standard way of generating synthetic data for the training of speckle reduction algorithms and demonstrate a use-case to advance research in this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11021v1-abstract-full').style.display = 'none'; document.getElementById('2004.11021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.08641">arXiv:2002.08641</a> <span> [<a href="https://arxiv.org/pdf/2002.08641">pdf</a>, <a href="https://arxiv.org/format/2002.08641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Novel Framework for Selection of GANs for an Application </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Motwani%2C+T">Tanya Motwani</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.08641v2-abstract-short" style="display: inline;"> Generative Adversarial Network (GAN) is a current focal point of research. The body of knowledge is fragmented, leading to a trial-error method while selecting an appropriate GAN for a given scenario. We provide a comprehensive summary of the evolution of GANs starting from its inception addressing issues like mode collapse, vanishing gradient, unstable training and non-convergence. We also provid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.08641v2-abstract-full').style.display = 'inline'; document.getElementById('2002.08641v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.08641v2-abstract-full" style="display: none;"> Generative Adversarial Network (GAN) is a current focal point of research. The body of knowledge is fragmented, leading to a trial-error method while selecting an appropriate GAN for a given scenario. We provide a comprehensive summary of the evolution of GANs starting from its inception addressing issues like mode collapse, vanishing gradient, unstable training and non-convergence. We also provide a comparison of various GANs from the application point of view, its behaviour and implementation details. We propose a novel framework to identify candidate GANs for a specific use case based on architecture, loss, regularization and divergence. We also discuss application of the framework using an example, and we demonstrate a significant reduction in search space. This efficient way to determine potential GANs lowers unit economics of AI development for organizations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.08641v2-abstract-full').style.display = 'none'; document.getElementById('2002.08641v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 1 figure, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.05547">arXiv:2002.05547</a> <span> [<a href="https://arxiv.org/pdf/2002.05547">pdf</a>, <a href="https://arxiv.org/format/2002.05547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-59638-5_13">10.1007/978-3-030-59638-5_13 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dynamic Role-Based Access Control for Decentralized Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chatterjee%2C+A">Arnab Chatterjee</a>, <a href="/search/cs?searchtype=author&query=Pitroda%2C+Y">Yash Pitroda</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.05547v2-abstract-short" style="display: inline;"> Access control management is an integral part of maintaining the security of an application. Although there has been significant work in the field of cloud access control mechanisms, however, with the advent of Distributed Ledger Technology (DLT), on-chain access control management frameworks hardly exist. Existing access control management mechanisms are tightly coupled with the business logic, r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.05547v2-abstract-full').style.display = 'inline'; document.getElementById('2002.05547v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.05547v2-abstract-full" style="display: none;"> Access control management is an integral part of maintaining the security of an application. Although there has been significant work in the field of cloud access control mechanisms, however, with the advent of Distributed Ledger Technology (DLT), on-chain access control management frameworks hardly exist. Existing access control management mechanisms are tightly coupled with the business logic, resulting in governance issues, non-coherent with existing Identity Management Solutions, low security, and compromised usability. We propose a novel framework to implement dynamic role-based access control for decentralized applications (dApps). The framework allows for managing access control on a dApp, which is completely decoupled from the business application and integrates seamlessly with any dApps. The smart contract architecture allows for the independent management of business logic and execution of access control policies. It also facilitates secure, low cost, and a high degree of flexibility of access control management. The proposed framework promotes decentralized governance of access control policies and efficient smart contract upgrades. We also provide quantitative and qualitative metrics for the efficacy and efficiency of the framework. Any Turing complete smart contract programming language is an excellent fit to implement the framework. We expect this framework to benefit enterprise and non-enterprise dApps and provide greater access control flexibility and effective integration with traditional and state of the art identity management solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.05547v2-abstract-full').style.display = 'none'; document.getElementById('2002.05547v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, 1 table</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Blockchain -- ICBC 2020. Lecture Notes in Computer Science, vol 12404. Springer, Cham </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.04989">arXiv:2002.04989</a> <span> [<a href="https://arxiv.org/pdf/2002.04989">pdf</a>, <a href="https://arxiv.org/format/2002.04989">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Eigenvector Component Calculation Speedup over NumPy for High-Performance Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dabhi%2C+S">Shrey Dabhi</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Manojkumar Parmar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.04989v4-abstract-short" style="display: inline;"> Applications related to artificial intelligence, machine learning, and system identification simulations essentially use eigenvectors. Calculating eigenvectors for very large matrices using conventional methods is compute-intensive and renders the applications slow. Recently, Eigenvector-Eigenvalue Identity formula promising significant speedup was identified. We study the algorithmic implementati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.04989v4-abstract-full').style.display = 'inline'; document.getElementById('2002.04989v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.04989v4-abstract-full" style="display: none;"> Applications related to artificial intelligence, machine learning, and system identification simulations essentially use eigenvectors. Calculating eigenvectors for very large matrices using conventional methods is compute-intensive and renders the applications slow. Recently, Eigenvector-Eigenvalue Identity formula promising significant speedup was identified. We study the algorithmic implementation of the formula against the existing state-of-the-art algorithms and their implementations to evaluate the performance gains. We provide a first of its kind systematic study of the implementation of the formula. We demonstrate further improvements using high-performance computing concepts over native NumPy eigenvector implementation which uses LAPACK and BLAS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.04989v4-abstract-full').style.display = 'none'; document.getElementById('2002.04989v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at 8th International Conference on Recent Trends in Computing (ICRTC 2020), to be published in Springer Lecture Notes in Networks and Systems (LNNS)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.07351">arXiv:1910.07351</a> <span> [<a href="https://arxiv.org/pdf/1910.07351">pdf</a>, <a href="https://arxiv.org/format/1910.07351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-45442-5_61">10.1007/978-3-030-45442-5_61 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> NLPExplorer: Exploring the Universe of NLP Papers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Monarch Parmar</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+N">Naman Jain</a>, <a href="/search/cs?searchtype=author&query=Jain%2C+P">Pranjali Jain</a>, <a href="/search/cs?searchtype=author&query=Sahit%2C+P+J">P Jayakrishna Sahit</a>, <a href="/search/cs?searchtype=author&query=Pachpande%2C+S">Soham Pachpande</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S">Shruti Singh</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+M">Mayank Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.07351v2-abstract-short" style="display: inline;"> Understanding the current research trends, problems, and their innovative solutions remains a bottleneck due to the ever-increasing volume of scientific articles. In this paper, we propose NLPExplorer, a completely automatic portal for indexing, searching, and visualizing Natural Language Processing (NLP) research volume. NLPExplorer presents interesting insights from papers, authors, venues, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.07351v2-abstract-full').style.display = 'inline'; document.getElementById('1910.07351v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.07351v2-abstract-full" style="display: none;"> Understanding the current research trends, problems, and their innovative solutions remains a bottleneck due to the ever-increasing volume of scientific articles. In this paper, we propose NLPExplorer, a completely automatic portal for indexing, searching, and visualizing Natural Language Processing (NLP) research volume. NLPExplorer presents interesting insights from papers, authors, venues, and topics. In contrast to previous topic modelling based approaches, we manually curate five course-grained non-exclusive topical categories namely Linguistic Target (Syntax, Discourse, etc.), Tasks (Tagging, Summarization, etc.), Approaches (unsupervised, supervised, etc.), Languages (English, Chinese,etc.) and Dataset types (news, clinical notes, etc.). Some of the novel features include a list of young popular authors, popular URLs, and datasets, a list of topically diverse papers and recent popular papers. Also, it provides temporal statistics such as yearwise popularity of topics, datasets, and seminal papers. To facilitate future research and system development, we make all the processed datasets accessible through API calls. The current system is available at http://lingo.iitgn.ac.in:5001/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.07351v2-abstract-full').style.display = 'none'; document.getElementById('1910.07351v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42nd European Conference on Information Retrieval Research, ECIR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.01996">arXiv:1905.01996</a> <span> [<a href="https://arxiv.org/pdf/1905.01996">pdf</a>, <a href="https://arxiv.org/format/1905.01996">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-05918-7_27">10.1007/978-3-030-05918-7_27 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Neural Machine Translation with Recurrent Highway Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Maulik Parmar</a>, <a href="/search/cs?searchtype=author&query=Devi%2C+V+S">V. Susheela Devi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.01996v1-abstract-short" style="display: inline;"> Recurrent Neural Networks have lately gained a lot of popularity in language modelling tasks, especially in neural machine translation(NMT). Very recent NMT models are based on Encoder-Decoder, where a deep LSTM based encoder is used to project the source sentence to a fixed dimensional vector and then another deep LSTM decodes the target sentence from the vector. However there has been very littl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.01996v1-abstract-full').style.display = 'inline'; document.getElementById('1905.01996v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.01996v1-abstract-full" style="display: none;"> Recurrent Neural Networks have lately gained a lot of popularity in language modelling tasks, especially in neural machine translation(NMT). Very recent NMT models are based on Encoder-Decoder, where a deep LSTM based encoder is used to project the source sentence to a fixed dimensional vector and then another deep LSTM decodes the target sentence from the vector. However there has been very little work on exploring architectures that have more than one layer in space(i.e. in each time step). This paper examines the effectiveness of the simple Recurrent Highway Networks(RHN) in NMT tasks. The model uses Recurrent Highway Neural Network in encoder and decoder, with attention .We also explore the reconstructor model to improve adequacy. We demonstrate the effectiveness of all three approaches on the IWSLT English-Vietnamese dataset. We see that RHN performs on par with LSTM based models and even better in some cases.We see that deep RHN models are easy to train compared to deep LSTM based models because of highway connections. The paper also investigates the effects of increasing recurrent depth in each time step. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.01996v1-abstract-full').style.display = 'none'; document.getElementById('1905.01996v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">International Conference on Mining Intelligence and Knowledge Exploration</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In: Groza A., Prasath R. (eds) Mining Intelligence and Knowledge Exploration. MIKE 2018. Lecture Notes in Computer Science, vol 11308. Springer, Cham </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository