CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 56 results for author: <span class="mathjax">Talukdar, P</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Talukdar, P"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Talukdar%2C+P&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Talukdar, P"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16816">arXiv:2404.16816</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.16816">pdf</a>, <a href="https://arxiv.org/format/2404.16816">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> IndicGenBench: A Multilingual Benchmark to Evaluate Generation Capabilities of LLMs on Indic Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Singh%2C+H">Harman Singh</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Bharadwaj%2C+S">Shikhar Bharadwaj</a>, <a href="/search/cs?searchtype=author&amp;query=Tewari%2C+D">Dinesh Tewari</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16816v2-abstract-short" style="display: inline;"> As large language models (LLMs) see increasing adoption across the globe, it is imperative for LLMs to be representative of the linguistic diversity of the world. India is a linguistically diverse country of 1.4 Billion people. To facilitate research on multilingual LLM evaluation, we release IndicGenBench - the largest benchmark for evaluating LLMs on user-facing generation tasks across a diverse&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16816v2-abstract-full').style.display = 'inline'; document.getElementById('2404.16816v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16816v2-abstract-full" style="display: none;"> As large language models (LLMs) see increasing adoption across the globe, it is imperative for LLMs to be representative of the linguistic diversity of the world. India is a linguistically diverse country of 1.4 Billion people. To facilitate research on multilingual LLM evaluation, we release IndicGenBench - the largest benchmark for evaluating LLMs on user-facing generation tasks across a diverse set 29 of Indic languages covering 13 scripts and 4 language families. IndicGenBench is composed of diverse generation tasks like cross-lingual summarization, machine translation, and cross-lingual question answering. IndicGenBench extends existing benchmarks to many Indic languages through human curation providing multi-way parallel evaluation data for many under-represented Indic languages for the first time. We evaluate a wide range of proprietary and open-source LLMs including GPT-3.5, GPT-4, PaLM-2, mT5, Gemma, BLOOM and LLaMA on IndicGenBench in a variety of settings. The largest PaLM-2 models performs the best on most tasks, however, there is a significant performance gap in all languages compared to English showing that further research is needed for the development of more inclusive multilingual language models. IndicGenBench is released at www.github.com/google-research-datasets/indic-gen-bench <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16816v2-abstract-full').style.display = 'none'; document.getElementById('2404.16816v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.02412">arXiv:2401.02412</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.02412">pdf</a>, <a href="https://arxiv.org/format/2401.02412">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LLM Augmented LLMs: Expanding Capabilities through Composition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bansal%2C+R">Rachit Bansal</a>, <a href="/search/cs?searchtype=author&amp;query=Samanta%2C+B">Bidisha Samanta</a>, <a href="/search/cs?searchtype=author&amp;query=Dalmia%2C+S">Siddharth Dalmia</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Ganapathy%2C+S">Sriram Ganapathy</a>, <a href="/search/cs?searchtype=author&amp;query=Bapna%2C+A">Abhishek Bapna</a>, <a href="/search/cs?searchtype=author&amp;query=Jain%2C+P">Prateek Jain</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.02412v1-abstract-short" style="display: inline;"> Foundational models with billions of parameters which have been trained on large corpora of data have demonstrated non-trivial skills in a variety of domains. However, due to their monolithic structure, it is challenging and expensive to augment them or impart new skills. On the other hand, due to their adaptation abilities, several new instances of these models are being trained towards new domai&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02412v1-abstract-full').style.display = 'inline'; document.getElementById('2401.02412v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.02412v1-abstract-full" style="display: none;"> Foundational models with billions of parameters which have been trained on large corpora of data have demonstrated non-trivial skills in a variety of domains. However, due to their monolithic structure, it is challenging and expensive to augment them or impart new skills. On the other hand, due to their adaptation abilities, several new instances of these models are being trained towards new domains and tasks. In this work, we study the problem of efficient and practical composition of existing foundation models with more specific models to enable newer capabilities. To this end, we propose CALM -- Composition to Augment Language Models -- which introduces cross-attention between models to compose their representations and enable new capabilities. Salient features of CALM are: (i) Scales up LLMs on new tasks by &#39;re-using&#39; existing LLMs along with a few additional parameters and data, (ii) Existing model weights are kept intact, and hence preserves existing capabilities, and (iii) Applies to diverse domains and settings. We illustrate that augmenting PaLM2-S with a smaller model trained on low-resource languages results in an absolute improvement of up to 13\% on tasks like translation into English and arithmetic reasoning for low-resource languages. Similarly, when PaLM2-S is augmented with a code-specific model, we see a relative improvement of 40\% over the base model for code generation and explanation tasks -- on-par with fully fine-tuned counterparts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02412v1-abstract-full').style.display = 'none'; document.getElementById('2401.02412v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 2 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00913">arXiv:2311.00913</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.00913">pdf</a>, <a href="https://arxiv.org/format/2311.00913">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Self-Influence Guided Data Reweighting for Language Model Pre-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakkar%2C+M">Megh Thakkar</a>, <a href="/search/cs?searchtype=author&amp;query=Bolukbasi%2C+T">Tolga Bolukbasi</a>, <a href="/search/cs?searchtype=author&amp;query=Ganapathy%2C+S">Sriram Ganapathy</a>, <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Chandar%2C+S">Sarath Chandar</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00913v1-abstract-short" style="display: inline;"> Language Models (LMs) pre-trained with self-supervision on large text corpora have become the default starting point for developing models for various NLP tasks. Once the pre-training corpus has been assembled, all data samples in the corpus are treated with equal importance during LM pre-training. However, due to varying levels of relevance and quality of data, equal importance to all the data sa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00913v1-abstract-full').style.display = 'inline'; document.getElementById('2311.00913v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00913v1-abstract-full" style="display: none;"> Language Models (LMs) pre-trained with self-supervision on large text corpora have become the default starting point for developing models for various NLP tasks. Once the pre-training corpus has been assembled, all data samples in the corpus are treated with equal importance during LM pre-training. However, due to varying levels of relevance and quality of data, equal importance to all the data samples may not be the optimal choice. While data reweighting has been explored in the context of task-specific supervised learning and LM fine-tuning, model-driven reweighting for pre-training data has not been explored. We fill this important gap and propose PRESENCE, a method for jointly reweighting samples by leveraging self-influence (SI) scores as an indicator of sample importance and pre-training. PRESENCE promotes novelty and stability for model pre-training. Through extensive analysis spanning multiple model sizes, datasets, and tasks, we present PRESENCE as an important first step in the research direction of sample reweighting for pre-training language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00913v1-abstract-full').style.display = 'none'; document.getElementById('2311.00913v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.10567">arXiv:2309.10567</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.10567">pdf</a>, <a href="https://arxiv.org/format/2309.10567">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Modeling For Spoken Language Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bharadwaj%2C+S">Shikhar Bharadwaj</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Min Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Bapna%2C+A">Ankur Bapna</a>, <a href="/search/cs?searchtype=author&amp;query=Ganapathy%2C+S">Sriram Ganapathy</a>, <a href="/search/cs?searchtype=author&amp;query=Axelrod%2C+V">Vera Axelrod</a>, <a href="/search/cs?searchtype=author&amp;query=Dalmia%2C+S">Siddharth Dalmia</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+W">Wei Han</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=van+Esch%2C+D">Daan van Esch</a>, <a href="/search/cs?searchtype=author&amp;query=Ritchie%2C+S">Sandy Ritchie</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Riesa%2C+J">Jason Riesa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.10567v1-abstract-short" style="display: inline;"> Spoken language identification refers to the task of automatically predicting the spoken language in a given utterance. Conventionally, it is modeled as a speech-based language identification task. Prior techniques have been constrained to a single modality; however in the case of video data there is a wealth of other metadata that may be beneficial for this task. In this work, we propose MuSeLI,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10567v1-abstract-full').style.display = 'inline'; document.getElementById('2309.10567v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.10567v1-abstract-full" style="display: none;"> Spoken language identification refers to the task of automatically predicting the spoken language in a given utterance. Conventionally, it is modeled as a speech-based language identification task. Prior techniques have been constrained to a single modality; however in the case of video data there is a wealth of other metadata that may be beneficial for this task. In this work, we propose MuSeLI, a Multimodal Spoken Language Identification method, which delves into the use of various metadata sources to enhance language identification. Our study reveals that metadata such as video title, description and geographic location provide substantial information to identify the spoken language of the multimedia recording. We conduct experiments using two diverse public datasets of YouTube videos, and obtain state-of-the-art results on the language identification task. We additionally conduct an ablation study that describes the distinct contribution of each modality for language recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10567v1-abstract-full').style.display = 'none'; document.getElementById('2309.10567v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.04374">arXiv:2306.04374</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.04374">pdf</a>, <a href="https://arxiv.org/format/2306.04374">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Label Aware Speech Representation Learning For Language Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Bharadwaj%2C+S">Shikhar Bharadwaj</a>, <a href="/search/cs?searchtype=author&amp;query=Ganapathy%2C+S">Sriram Ganapathy</a>, <a href="/search/cs?searchtype=author&amp;query=Bapna%2C+A">Ankur Bapna</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Min Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+W">Wei Han</a>, <a href="/search/cs?searchtype=author&amp;query=Axelrod%2C+V">Vera Axelrod</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.04374v1-abstract-short" style="display: inline;"> Speech representation learning approaches for non-semantic tasks such as language recognition have either explored supervised embedding extraction methods using a classifier model or self-supervised representation learning approaches using raw data. In this paper, we propose a novel framework of combining self-supervised representation learning with the language label information for the pre-train&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04374v1-abstract-full').style.display = 'inline'; document.getElementById('2306.04374v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.04374v1-abstract-full" style="display: none;"> Speech representation learning approaches for non-semantic tasks such as language recognition have either explored supervised embedding extraction methods using a classifier model or self-supervised representation learning approaches using raw data. In this paper, we propose a novel framework of combining self-supervised representation learning with the language label information for the pre-training task. This framework, termed as Label Aware Speech Representation (LASR) learning, uses a triplet based objective function to incorporate language labels along with the self-supervised loss function. The speech representations are further fine-tuned for the downstream task. The language recognition experiments are performed on two public datasets - FLEURS and Dhwani. In these experiments, we illustrate that the proposed LASR framework improves over the state-of-the-art systems on language identification. We also report an analysis of the robustness of LASR approach to noisy/missing labels as well as its application to multi-lingual speech recognition tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04374v1-abstract-full').style.display = 'none'; document.getElementById('2306.04374v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at Interspeech 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11938">arXiv:2305.11938</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.11938">pdf</a>, <a href="https://arxiv.org/format/2305.11938">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.findings-emnlp.125">10.18653/v1/2023.findings-emnlp.125 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> XTREME-UP: A User-Centric Scarce-Data Benchmark for Under-Represented Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Gutkin%2C+A">Alexander Gutkin</a>, <a href="/search/cs?searchtype=author&amp;query=Kale%2C+M">Mihir Kale</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+M">Min Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Nicosia%2C+M">Massimo Nicosia</a>, <a href="/search/cs?searchtype=author&amp;query=Rijhwani%2C+S">Shruti Rijhwani</a>, <a href="/search/cs?searchtype=author&amp;query=Riley%2C+P">Parker Riley</a>, <a href="/search/cs?searchtype=author&amp;query=Sarr%2C+J+A">Jean-Michel A. Sarr</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Katanova%2C+A">Anna Katanova</a>, <a href="/search/cs?searchtype=author&amp;query=Kirov%2C+C">Christo Kirov</a>, <a href="/search/cs?searchtype=author&amp;query=Dickinson%2C+D+L">Dana L. Dickinson</a>, <a href="/search/cs?searchtype=author&amp;query=Roark%2C+B">Brian Roark</a>, <a href="/search/cs?searchtype=author&amp;query=Samanta%2C+B">Bidisha Samanta</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+C">Connie Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Adelani%2C+D+I">David I. Adelani</a>, <a href="/search/cs?searchtype=author&amp;query=Axelrod%2C+V">Vera Axelrod</a>, <a href="/search/cs?searchtype=author&amp;query=Caswell%2C+I">Isaac Caswell</a>, <a href="/search/cs?searchtype=author&amp;query=Cherry%2C+C">Colin Cherry</a>, <a href="/search/cs?searchtype=author&amp;query=Garrette%2C+D">Dan Garrette</a>, <a href="/search/cs?searchtype=author&amp;query=Ingle%2C+R">Reeve Ingle</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+M">Melvin Johnson</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11938v2-abstract-short" style="display: inline;"> Data scarcity is a crucial issue for the development of highly multilingual NLP systems. Yet for many under-represented languages (ULs) -- languages for which NLP re-search is particularly far behind in meeting user needs -- it is feasible to annotate small amounts of data. Motivated by this, we propose XTREME-UP, a benchmark defined by: its focus on the scarce-data scenario rather than zero-shot;&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11938v2-abstract-full').style.display = 'inline'; document.getElementById('2305.11938v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11938v2-abstract-full" style="display: none;"> Data scarcity is a crucial issue for the development of highly multilingual NLP systems. Yet for many under-represented languages (ULs) -- languages for which NLP re-search is particularly far behind in meeting user needs -- it is feasible to annotate small amounts of data. Motivated by this, we propose XTREME-UP, a benchmark defined by: its focus on the scarce-data scenario rather than zero-shot; its focus on user-centric tasks -- tasks with broad adoption by speakers of high-resource languages; and its focus on under-represented languages where this scarce-data scenario tends to be most realistic. XTREME-UP evaluates the capabilities of language models across 88 under-represented languages over 9 key user-centric technologies including ASR, OCR, MT, and information access tasks that are of general utility. We create new datasets for OCR, autocomplete, semantic parsing, and transliteration, and build on and refine existing datasets for other tasks. XTREME-UP provides methodology for evaluating many modeling scenarios including text-only, multi-modal (vision, audio, and text),supervised parameter tuning, and in-context learning. We evaluate commonly used models on the benchmark. We release all code and scripts to train and evaluate models <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11938v2-abstract-full').style.display = 'none'; document.getElementById('2305.11938v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.12860">arXiv:2303.12860</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.12860">pdf</a>, <a href="https://arxiv.org/format/2303.12860">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Salient Span Masking for Temporal Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Chaudhary%2C+A">Aditi Chaudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.12860v1-abstract-short" style="display: inline;"> Salient Span Masking (SSM) has shown itself to be an effective strategy to improve closed-book question answering performance. SSM extends general masked language model pretraining by creating additional unsupervised training sentences that mask a single entity or date span, thus oversampling factual information. Despite the success of this paradigm, the span types and sampling strategies are rela&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12860v1-abstract-full').style.display = 'inline'; document.getElementById('2303.12860v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.12860v1-abstract-full" style="display: none;"> Salient Span Masking (SSM) has shown itself to be an effective strategy to improve closed-book question answering performance. SSM extends general masked language model pretraining by creating additional unsupervised training sentences that mask a single entity or date span, thus oversampling factual information. Despite the success of this paradigm, the span types and sampling strategies are relatively arbitrary and not widely studied for other tasks. Thus, we investigate SSM from the perspective of temporal tasks, where learning a good representation of various temporal expressions is important. To that end, we introduce Temporal Span Masking (TSM) intermediate training. First, we find that SSM alone improves the downstream performance on three temporal tasks by an avg. +5.8 points. Further, we are able to achieve additional improvements (avg. +0.29 points) by adding the TSM task. These comprise the new best reported results on the targeted tasks. Our analysis suggests that the effectiveness of SSM stems from the sentences chosen in the training data rather than the mask choice: sentences with entities frequently also contain temporal expressions. Nonetheless, the additional targeted spans of TSM can still improve performance, especially in a zero-shot context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12860v1-abstract-full').style.display = 'none'; document.getElementById('2303.12860v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages 1 figure, to appear in EACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.11206">arXiv:2211.11206</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.11206">pdf</a>, <a href="https://arxiv.org/format/2211.11206">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Cultural Re-contextualization of Fairness Research in Language Technologies in India </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+S">Shaily Bhatt</a>, <a href="/search/cs?searchtype=author&amp;query=Dev%2C+S">Sunipa Dev</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Dave%2C+S">Shachi Dave</a>, <a href="/search/cs?searchtype=author&amp;query=Prabhakaran%2C+V">Vinodkumar Prabhakaran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.11206v1-abstract-short" style="display: inline;"> Recent research has revealed undesirable biases in NLP data and models. However, these efforts largely focus on social disparities in the West, and are not directly portable to other geo-cultural contexts. In this position paper, we outline a holistic research agenda to re-contextualize NLP fairness research for the Indian context, accounting for Indian societal context, bridging technological gap&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.11206v1-abstract-full').style.display = 'inline'; document.getElementById('2211.11206v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.11206v1-abstract-full" style="display: none;"> Recent research has revealed undesirable biases in NLP data and models. However, these efforts largely focus on social disparities in the West, and are not directly portable to other geo-cultural contexts. In this position paper, we outline a holistic research agenda to re-contextualize NLP fairness research for the Indian context, accounting for Indian societal context, bridging technological gaps in capability and resources, and adapting to Indian cultural values. We also summarize findings from an empirical study on various social biases along different axes of disparities relevant to India, demonstrating their prevalence in corpora and models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.11206v1-abstract-full').style.display = 'none'; document.getElementById('2211.11206v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS Workshop on &#34;Cultures in AI/AI in Culture&#34;. This is a non-archival short version, to cite please refer to our complete paper: arXiv:2209.12226</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.07615">arXiv:2211.07615</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.07615">pdf</a>, <a href="https://arxiv.org/format/2211.07615">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> UGIF: UI Grounded Instruction Following </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Venkatesh%2C+S+G">Sagar Gubbi Venkatesh</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanan%2C+S">Srini Narayanan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.07615v2-abstract-short" style="display: inline;"> Smartphone users often find it difficult to navigate myriad menus to perform common tasks such as &#34;How to block calls from unknown numbers?&#34;. Currently, help documents with step-by-step instructions are manually written to aid the user. The user experience can be further enhanced by grounding the instructions in the help document to the UI and overlaying a tutorial on the phone UI. To build such t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.07615v2-abstract-full').style.display = 'inline'; document.getElementById('2211.07615v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.07615v2-abstract-full" style="display: none;"> Smartphone users often find it difficult to navigate myriad menus to perform common tasks such as &#34;How to block calls from unknown numbers?&#34;. Currently, help documents with step-by-step instructions are manually written to aid the user. The user experience can be further enhanced by grounding the instructions in the help document to the UI and overlaying a tutorial on the phone UI. To build such tutorials, several natural language processing components including retrieval, parsing, and grounding are necessary, but there isn&#39;t any relevant dataset for such a task. Thus, we introduce UGIF-DataSet, a multi-lingual, multi-modal UI grounded dataset for step-by-step task completion on the smartphone containing 4,184 tasks across 8 languages. As an initial approach to this problem, we propose retrieving the relevant instruction steps based on the user&#39;s query and parsing the steps using Large Language Models (LLMs) to generate macros that can be executed on-device. The instruction steps are often available only in English, so the challenge includes cross-modal, cross-lingual retrieval of English how-to pages from user queries in many languages and mapping English instruction steps to UI in a potentially different language. We compare the performance of different LLMs including PaLM and GPT-3 and find that the end-to-end task completion rate is 48% for English UI but the performance drops to 32% for other languages. We analyze the common failure modes of existing models on this task and point out areas for improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.07615v2-abstract-full').style.display = 'none'; document.getElementById('2211.07615v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.07313">arXiv:2210.07313</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.07313">pdf</a>, <a href="https://arxiv.org/format/2210.07313">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bootstrapping Multilingual Semantic Parsers using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Awasthi%2C+A">Abhijeet Awasthi</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Samanta%2C+B">Bidisha Samanta</a>, <a href="/search/cs?searchtype=author&amp;query=Dave%2C+S">Shachi Dave</a>, <a href="/search/cs?searchtype=author&amp;query=Sarawagi%2C+S">Sunita Sarawagi</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.07313v2-abstract-short" style="display: inline;"> Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.07313v2-abstract-full').style.display = 'inline'; document.getElementById('2210.07313v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.07313v2-abstract-full" style="display: none;"> Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated translation pairs. Further, translation services may continue to be brittle due to domain mismatch between task-specific input text and general-purpose text used for training translation models. For multilingual semantic parsing, we demonstrate the effectiveness and flexibility offered by large language models (LLMs) for translating English datasets into several languages via few-shot prompting. Through extensive comparisons on two public datasets, MTOP and MASSIVE, spanning 50 languages and several domains, we show that our method of translating data using LLMs outperforms a strong translate-train baseline on 41 out of 50 languages. We study the key design choices that enable more effective multilingual data translation via prompted LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.07313v2-abstract-full').style.display = 'none'; document.getElementById('2210.07313v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EACL-23</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.06281">arXiv:2210.06281</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.06281">pdf</a>, <a href="https://arxiv.org/format/2210.06281">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.eacl-main.150">10.18653/v1/2023.eacl-main.150 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> TwiRGCN: Temporally Weighted Graph Convolution for Question Answering over Temporal Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sharma%2C+A">Aditya Sharma</a>, <a href="/search/cs?searchtype=author&amp;query=Saxena%2C+A">Apoorv Saxena</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+C">Chitrank Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Kazemi%2C+S+M">Seyed Mehran Kazemi</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Chakrabarti%2C+S">Soumen Chakrabarti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.06281v2-abstract-short" style="display: inline;"> Recent years have witnessed much interest in temporal reasoning over knowledge graphs (KG) for complex question answering (QA), but there remains a substantial gap in human capabilities. We explore how to generalize relational graph convolutional networks (RGCN) for temporal KGQA. Specifically, we propose a novel, intuitive and interpretable scheme to modulate the messages passed through a KG edge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.06281v2-abstract-full').style.display = 'inline'; document.getElementById('2210.06281v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.06281v2-abstract-full" style="display: none;"> Recent years have witnessed much interest in temporal reasoning over knowledge graphs (KG) for complex question answering (QA), but there remains a substantial gap in human capabilities. We explore how to generalize relational graph convolutional networks (RGCN) for temporal KGQA. Specifically, we propose a novel, intuitive and interpretable scheme to modulate the messages passed through a KG edge during convolution, based on the relevance of its associated time period to the question. We also introduce a gating device to predict if the answer to a complex temporal question is likely to be a KG entity or time and use this prediction to guide our scoring mechanism. We evaluate the resulting system, which we call TwiRGCN, on TimeQuestions, a recently released, challenging dataset for multi-hop complex temporal QA. We show that TwiRGCN significantly outperforms state-of-the-art systems on this dataset across diverse question types. Notably, TwiRGCN improves accuracy by 9--10 percentage points for the most difficult ordinal and implicit question types. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.06281v2-abstract-full').style.display = 'none'; document.getElementById('2210.06281v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages + references + appendix</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2023) pages 2049 to 2060 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.12226">arXiv:2209.12226</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.12226">pdf</a>, <a href="https://arxiv.org/format/2209.12226">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Re-contextualizing Fairness in NLP: The Case of India </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bhatt%2C+S">Shaily Bhatt</a>, <a href="/search/cs?searchtype=author&amp;query=Dev%2C+S">Sunipa Dev</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Dave%2C+S">Shachi Dave</a>, <a href="/search/cs?searchtype=author&amp;query=Prabhakaran%2C+V">Vinodkumar Prabhakaran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.12226v5-abstract-short" style="display: inline;"> Recent research has revealed undesirable biases in NLP data and models. However, these efforts focus on social disparities in West, and are not directly portable to other geo-cultural contexts. In this paper, we focus on NLP fair-ness in the context of India. We start with a brief account of the prominent axes of social disparities in India. We build resources for fairness evaluation in the Indian&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12226v5-abstract-full').style.display = 'inline'; document.getElementById('2209.12226v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.12226v5-abstract-full" style="display: none;"> Recent research has revealed undesirable biases in NLP data and models. However, these efforts focus on social disparities in West, and are not directly portable to other geo-cultural contexts. In this paper, we focus on NLP fair-ness in the context of India. We start with a brief account of the prominent axes of social disparities in India. We build resources for fairness evaluation in the Indian context and use them to demonstrate prediction biases along some of the axes. We then delve deeper into social stereotypes for Region andReligion, demonstrating its prevalence in corpora and models. Finally, we outline a holistic research agenda to re-contextualize NLP fairness research for the Indian context, ac-counting for Indian societal context, bridging technological gaps in NLP capabilities and re-sources, and adapting to Indian cultural values. While we focus on India, this framework can be generalized to other geo-cultural contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12226v5-abstract-full').style.display = 'none'; document.getElementById('2209.12226v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to AACL-IJCNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.06767">arXiv:2209.06767</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.06767">pdf</a>, <a href="https://arxiv.org/format/2209.06767">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Parameter-Efficient Finetuning for Robust Continual Multilingual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Badola%2C+K">Kartikeya Badola</a>, <a href="/search/cs?searchtype=author&amp;query=Dave%2C+S">Shachi Dave</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.06767v3-abstract-short" style="display: inline;"> We introduce and study the problem of Continual Multilingual Learning (CML) where a previously trained multilingual model is periodically updated using new data arriving in stages. If the new data is present only in a subset of languages, we find that the resulting model shows improved performance only on the languages included in the latest update (and a few closely related languages) while its p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06767v3-abstract-full').style.display = 'inline'; document.getElementById('2209.06767v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.06767v3-abstract-full" style="display: none;"> We introduce and study the problem of Continual Multilingual Learning (CML) where a previously trained multilingual model is periodically updated using new data arriving in stages. If the new data is present only in a subset of languages, we find that the resulting model shows improved performance only on the languages included in the latest update (and a few closely related languages) while its performance on all the remaining languages degrade significantly. We address this challenge by proposing LAFT-URIEL, a parameter-efficient finetuning strategy which aims to increase the number of languages on which the model improves after an update, while reducing the magnitude of loss in performance for the remaining languages. LAFT-URIEL uses linguistic knowledge to balance overfitting and knowledge sharing across languages, allowing for an additional 25% of task languages to see an improvement in performance after an update, while also reducing the average magnitude of losses on the remaining languages by 78% relative. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06767v3-abstract-full').style.display = 'none'; document.getElementById('2209.06767v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ACL Findings 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12676">arXiv:2205.12676</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.12676">pdf</a>, <a href="https://arxiv.org/format/2205.12676">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating the Diversity, Equity and Inclusion of NLP Technology: A Case Study for Indian Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khanuja%2C+S">Simran Khanuja</a>, <a href="/search/cs?searchtype=author&amp;query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12676v3-abstract-short" style="display: inline;"> In order for NLP technology to be widely applicable, fair, and useful, it needs to serve a diverse set of speakers across the world&#39;s languages, be equitable, i.e., not unduly biased towards any particular language, and be inclusive of all users, particularly in low-resource settings where compute constraints are common. In this paper, we propose an evaluation paradigm that assesses NLP technologi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12676v3-abstract-full').style.display = 'inline'; document.getElementById('2205.12676v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12676v3-abstract-full" style="display: none;"> In order for NLP technology to be widely applicable, fair, and useful, it needs to serve a diverse set of speakers across the world&#39;s languages, be equitable, i.e., not unduly biased towards any particular language, and be inclusive of all users, particularly in low-resource settings where compute constraints are common. In this paper, we propose an evaluation paradigm that assesses NLP technologies across all three dimensions. While diversity and inclusion have received attention in recent literature, equity is currently unexplored. We propose to address this gap using the Gini coefficient, a well-established metric used for estimating societal wealth inequality. Using our paradigm, we highlight the distressed state of current technologies for Indian (IN) languages (a linguistically large and diverse set, with a varied speaker population), across all three dimensions. To improve upon these metrics, we demonstrate the importance of region-specific choices in model building and dataset creation, and more importantly, propose a novel, generalisable approach to optimal resource allocation during fine-tuning. Finally, we discuss steps to mitigate these biases and encourage the community to employ multi-faceted evaluation when building linguistically diverse and equitable technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12676v3-abstract-full').style.display = 'none'; document.getElementById('2205.12676v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EACL Findings, 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.01976">arXiv:2203.01976</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.01976">pdf</a>, <a href="https://arxiv.org/format/2203.01976">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Overlap-based Vocabulary Generation Improves Cross-lingual Transfer Among Related Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Patil%2C+V">Vaidehi Patil</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Sarawagi%2C+S">Sunita Sarawagi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.01976v2-abstract-short" style="display: inline;"> Pre-trained multilingual language models such as mBERT and XLM-R have demonstrated great potential for zero-shot cross-lingual transfer to low web-resource languages (LRL). However, due to limited model capacity, the large difference in the sizes of available monolingual corpora between high web-resource languages (HRL) and LRLs does not provide enough scope of co-embedding the LRL with the HRL, t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.01976v2-abstract-full').style.display = 'inline'; document.getElementById('2203.01976v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.01976v2-abstract-full" style="display: none;"> Pre-trained multilingual language models such as mBERT and XLM-R have demonstrated great potential for zero-shot cross-lingual transfer to low web-resource languages (LRL). However, due to limited model capacity, the large difference in the sizes of available monolingual corpora between high web-resource languages (HRL) and LRLs does not provide enough scope of co-embedding the LRL with the HRL, thereby affecting downstream task performance of LRLs. In this paper, we argue that relatedness among languages in a language family along the dimension of lexical overlap may be leveraged to overcome some of the corpora limitations of LRLs. We propose Overlap BPE (OBPE), a simple yet effective modification to the BPE vocabulary generation algorithm which enhances overlap across related languages. Through extensive experiments on multiple NLP tasks and datasets, we observe that OBPE generates a vocabulary that increases the representation of LRLs via tokens shared with HRLs. This results in improved zero-shot transfer from related HRLs to LRLs without reducing HRL representation and accuracy. Unlike previous studies that dismissed the importance of token-overlap, we show that in the low-resource related language setting, token overlap matters. Synthetically reducing the overlap to zero can cause as much as a four-fold drop in zero-shot transfer accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.01976v2-abstract-full').style.display = 'none'; document.getElementById('2203.01976v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to appear at the ACL 2022 Main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.14782">arXiv:2110.14782</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.14782">pdf</a>, <a href="https://arxiv.org/format/2110.14782">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> When is BERT Multilingual? Isolating Crucial Ingredients for Cross-lingual Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Deshpande%2C+A">Ameet Deshpande</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Narasimhan%2C+K">Karthik Narasimhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.14782v3-abstract-short" style="display: inline;"> While recent work on multilingual language models has demonstrated their capacity for cross-lingual zero-shot transfer on downstream tasks, there is a lack of consensus in the community as to what shared properties between languages enable such transfer. Analyses involving pairs of natural languages are often inconclusive and contradictory since languages simultaneously differ in many linguistic a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.14782v3-abstract-full').style.display = 'inline'; document.getElementById('2110.14782v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.14782v3-abstract-full" style="display: none;"> While recent work on multilingual language models has demonstrated their capacity for cross-lingual zero-shot transfer on downstream tasks, there is a lack of consensus in the community as to what shared properties between languages enable such transfer. Analyses involving pairs of natural languages are often inconclusive and contradictory since languages simultaneously differ in many linguistic aspects. In this paper, we perform a large-scale empirical study to isolate the effects of various linguistic properties by measuring zero-shot transfer between four diverse natural languages and their counterparts constructed by modifying aspects such as the script, word order, and syntax. Among other things, our experiments show that the absence of sub-word overlap significantly affects zero-shot transfer when languages differ in their word order, and there is a strong correlation between transfer performance and word embedding alignment between languages (e.g., R=0.94 on the task of NLI). Our results call for focus in multilingual models on explicitly improving word embedding alignment between languages rather than relying on its implicit emergence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.14782v3-abstract-full').style.display = 'none'; document.getElementById('2110.14782v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NAACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.07385">arXiv:2110.07385</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.07385">pdf</a>, <a href="https://arxiv.org/format/2110.07385">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Few-shot Controllable Style Transfer for Low-Resource Multilingual Settings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Krishna%2C+K">Kalpesh Krishna</a>, <a href="/search/cs?searchtype=author&amp;query=Nathani%2C+D">Deepak Nathani</a>, <a href="/search/cs?searchtype=author&amp;query=Garcia%2C+X">Xavier Garcia</a>, <a href="/search/cs?searchtype=author&amp;query=Samanta%2C+B">Bidisha Samanta</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.07385v2-abstract-short" style="display: inline;"> Style transfer is the task of rewriting a sentence into a target style while approximately preserving content. While most prior literature assumes access to a large style-labelled corpus, recent work (Riley et al. 2021) has attempted &#34;few-shot&#34; style transfer using only 3-10 sentences at inference for style extraction. In this work we study a relevant low-resource setting: style transfer for langu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07385v2-abstract-full').style.display = 'inline'; document.getElementById('2110.07385v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.07385v2-abstract-full" style="display: none;"> Style transfer is the task of rewriting a sentence into a target style while approximately preserving content. While most prior literature assumes access to a large style-labelled corpus, recent work (Riley et al. 2021) has attempted &#34;few-shot&#34; style transfer using only 3-10 sentences at inference for style extraction. In this work we study a relevant low-resource setting: style transfer for languages where no style-labelled corpora are available. We notice that existing few-shot methods perform this task poorly, often copying inputs verbatim. We push the state-of-the-art for few-shot style transfer with a new method modeling the stylistic difference between paraphrases. When compared to prior work, our model achieves 2-3x better performance in formality transfer and code-mixing addition across seven languages. Moreover, our method is better at controlling the style transfer magnitude using an input scalar knob. We report promising qualitative results for several attribute transfer tasks (sentiment transfer, simplification, gender neutralization, text anonymization) all without retraining the model. Finally, we find model evaluation to be difficult due to the lack of datasets and metrics for many languages. To facilitate future research we crowdsource formality annotations for 4000 sentence pairs in four Indic languages, and use this data to design our automatic evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07385v2-abstract-full').style.display = 'none'; document.getElementById('2110.07385v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2022 camera ready, 30 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.14364">arXiv:2109.14364</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.14364">pdf</a>, <a href="https://arxiv.org/format/2109.14364">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multilingual Fact Linking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kolluru%2C+K">Keshav Kolluru</a>, <a href="/search/cs?searchtype=author&amp;query=Rezk%2C+M">Martin Rezk</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.14364v2-abstract-short" style="display: inline;"> Knowledge-intensive NLP tasks can benefit from linking natural language text with facts from a Knowledge Graph (KG). Although facts themselves are language-agnostic, the fact labels (i.e., language-specific representation of the fact) in the KG are often present only in a few languages. This makes it challenging to link KG facts to sentences in languages other than the limited set of languages. To&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14364v2-abstract-full').style.display = 'inline'; document.getElementById('2109.14364v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.14364v2-abstract-full" style="display: none;"> Knowledge-intensive NLP tasks can benefit from linking natural language text with facts from a Knowledge Graph (KG). Although facts themselves are language-agnostic, the fact labels (i.e., language-specific representation of the fact) in the KG are often present only in a few languages. This makes it challenging to link KG facts to sentences in languages other than the limited set of languages. To address this problem, we introduce the task of Multilingual Fact Linking (MFL) where the goal is to link fact expressed in a sentence to corresponding fact in the KG, even when the fact label in the KG is not available in the language of the sentence. To facilitate research in this area, we present a new evaluation dataset, IndicLink. This dataset contains 11,293 linked WikiData facts and 6,429 sentences spanning English and six Indian languages. We propose a Retrieval+Generation model, ReFCoG, that can scale to millions of KG facts by combining Dual Encoder based retrieval with a Seq2Seq based generation model which is constrained to output only valid KG facts. ReFCoG outperforms standard Retrieval+Re-ranking models by 10.7 pts in Precision@1. In spite of this gain, the model achieves an overall score of 52.1, showing ample scope for improvement in the task.ReFCoG code and IndicLink data are available at https://github.com/SaiKeshav/mfl <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14364v2-abstract-full').style.display = 'none'; document.getElementById('2109.14364v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AKBC 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.12806">arXiv:2106.12806</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.12806">pdf</a>, <a href="https://arxiv.org/format/2106.12806">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OKGIT: Open Knowledge Graph Link Prediction with Implicit Types </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chandrahas"> Chandrahas</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P+P">Partha Pratim Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.12806v1-abstract-short" style="display: inline;"> Open Knowledge Graphs (OpenKG) refer to a set of (head noun phrase, relation phrase, tail noun phrase) triples such as (tesla, return to, new york) extracted from a corpus using OpenIE tools. While OpenKGs are easy to bootstrap for a domain, they are very sparse and far from being directly usable in an end task. Therefore, the task of predicting new facts, i.e., link prediction, becomes an importa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.12806v1-abstract-full').style.display = 'inline'; document.getElementById('2106.12806v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.12806v1-abstract-full" style="display: none;"> Open Knowledge Graphs (OpenKG) refer to a set of (head noun phrase, relation phrase, tail noun phrase) triples such as (tesla, return to, new york) extracted from a corpus using OpenIE tools. While OpenKGs are easy to bootstrap for a domain, they are very sparse and far from being directly usable in an end task. Therefore, the task of predicting new facts, i.e., link prediction, becomes an important step while using these graphs in downstream tasks such as text comprehension, question answering, and web search query recommendation. Learning embeddings for OpenKGs is one approach for link prediction that has received some attention lately. However, on careful examination, we found that current OpenKG link prediction algorithms often predict noun phrases (NPs) with incompatible types for given noun and relation phrases. We address this problem in this work and propose OKGIT that improves OpenKG link prediction using novel type compatibility score and type regularization. With extensive experiments on multiple datasets, we show that the proposed method achieves state-of-the-art performance while producing type compatible NPs in the link prediction task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.12806v1-abstract-full').style.display = 'none'; document.getElementById('2106.12806v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of the ACL: ACL-IJCNLP 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.03958">arXiv:2106.03958</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.03958">pdf</a>, <a href="https://arxiv.org/format/2106.03958">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Language Relatedness for Low Web-Resource Language Model Adaptation: An Indic Languages Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khemchandani%2C+Y">Yash Khemchandani</a>, <a href="/search/cs?searchtype=author&amp;query=Mehtani%2C+S">Sarvesh Mehtani</a>, <a href="/search/cs?searchtype=author&amp;query=Patil%2C+V">Vaidehi Patil</a>, <a href="/search/cs?searchtype=author&amp;query=Awasthi%2C+A">Abhijeet Awasthi</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Sarawagi%2C+S">Sunita Sarawagi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.03958v2-abstract-short" style="display: inline;"> Recent research in multilingual language models (LM) has demonstrated their ability to effectively handle multiple languages in a single model. This holds promise for low web-resource languages (LRL) as multilingual models can enable transfer of supervision from high resource languages to LRLs. However, incorporating a new language in an LM still remains a challenge, particularly for languages wit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.03958v2-abstract-full').style.display = 'inline'; document.getElementById('2106.03958v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.03958v2-abstract-full" style="display: none;"> Recent research in multilingual language models (LM) has demonstrated their ability to effectively handle multiple languages in a single model. This holds promise for low web-resource languages (LRL) as multilingual models can enable transfer of supervision from high resource languages to LRLs. However, incorporating a new language in an LM still remains a challenge, particularly for languages with limited corpora and in unseen scripts. In this paper we argue that relatedness among languages in a language family may be exploited to overcome some of the corpora limitations of LRLs, and propose RelateLM. We focus on Indian languages, and exploit relatedness along two dimensions: (1) script (since many Indic scripts originated from the Brahmic script), and (2) sentence structure. RelateLM uses transliteration to convert the unseen script of limited LRL text into the script of a Related Prominent Language (RPL) (Hindi in our case). While exploiting similar sentence structures, RelateLM utilizes readily available bilingual dictionaries to pseudo translate RPL text into LRL corpora. Experiments on multiple real-world benchmark datasets provide validation to our hypothesis that using a related language as pivot, along with transliteration and pseudo translation based data augmentation, can be an effective way to adapt LMs for LRLs, rather than direct training or pivoting through English. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.03958v2-abstract-full').style.display = 'none'; document.getElementById('2106.03958v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL-IJCNLP 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.02834">arXiv:2106.02834</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.02834">pdf</a>, <a href="https://arxiv.org/format/2106.02834">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MergeDistill: Merging Pre-trained Language Models using Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khanuja%2C+S">Simran Khanuja</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.02834v1-abstract-short" style="display: inline;"> Pre-trained multilingual language models (LMs) have achieved state-of-the-art results in cross-lingual transfer, but they often lead to an inequitable representation of languages due to limited capacity, skewed pre-training data, and sub-optimal vocabularies. This has prompted the creation of an ever-growing pre-trained model universe, where each model is trained on large amounts of language or do&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02834v1-abstract-full').style.display = 'inline'; document.getElementById('2106.02834v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.02834v1-abstract-full" style="display: none;"> Pre-trained multilingual language models (LMs) have achieved state-of-the-art results in cross-lingual transfer, but they often lead to an inequitable representation of languages due to limited capacity, skewed pre-training data, and sub-optimal vocabularies. This has prompted the creation of an ever-growing pre-trained model universe, where each model is trained on large amounts of language or domain specific data with a carefully curated, linguistically informed vocabulary. However, doing so brings us back full circle and prevents one from leveraging the benefits of multilinguality. To address the gaps at both ends of the spectrum, we propose MergeDistill, a framework to merge pre-trained LMs in a way that can best leverage their assets with minimal dependencies, using task-agnostic knowledge distillation. We demonstrate the applicability of our framework in a practical setting by leveraging pre-existing teacher LMs and training student LMs that perform competitively with or even outperform teacher LMs trained on several orders of magnitude more data and with a fixed model capacity. We also highlight the importance of teacher selection and its impact on student model performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02834v1-abstract-full').style.display = 'none'; document.getElementById('2106.02834v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2021 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.01751">arXiv:2106.01751</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.01751">pdf</a>, <a href="https://arxiv.org/format/2106.01751">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Reordering Examples Helps during Priming-based Few-Shot Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+S">Sawan Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.01751v1-abstract-short" style="display: inline;"> The ability to learn from limited data, or few-shot learning, is a desirable and often critical requirement for NLP systems. While many existing methods do poorly at learning from a handful of examples, large pretrained language models have recently been shown to be efficient few-shot learners. One approach to few-shot learning, which does not require finetuning of model parameters, is to augment&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01751v1-abstract-full').style.display = 'inline'; document.getElementById('2106.01751v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.01751v1-abstract-full" style="display: none;"> The ability to learn from limited data, or few-shot learning, is a desirable and often critical requirement for NLP systems. While many existing methods do poorly at learning from a handful of examples, large pretrained language models have recently been shown to be efficient few-shot learners. One approach to few-shot learning, which does not require finetuning of model parameters, is to augment the language model&#39;s input with priming text which is typically constructed using task specific descriptions and examples. In this work, we further explore priming-based few-shot learning, with focus on using examples as prompts. We show that presenting examples in the right order is key for generalization. We introduce PERO (Prompting with Examples in the Right Order), where we formulate few-shot learning as search over the set of permutations of the training examples. We show that PERO can learn to generalize efficiently using as few as 10 examples, in contrast to existing approaches. While the newline token is a natural choice for separating the examples in the prompt, we show that learning a new separator token can potentially provide further gains in performance. We demonstrate the effectiveness of the proposed method on the tasks of sentiment classification, natural language inference and fact retrieval. Finally, we analyze the learned prompts to reveal novel insights, including the idea that two training examples in the right order alone can provide competitive performance for sentiment classification and natural language inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01751v1-abstract-full').style.display = 'none'; document.getElementById('2106.01751v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 1 figure, Accepted to Findings of ACL 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.01515">arXiv:2106.01515</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.01515">pdf</a>, <a href="https://arxiv.org/format/2106.01515">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Question Answering Over Temporal Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Saxena%2C+A">Apoorv Saxena</a>, <a href="/search/cs?searchtype=author&amp;query=Chakrabarti%2C+S">Soumen Chakrabarti</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.01515v1-abstract-short" style="display: inline;"> Temporal Knowledge Graphs (Temporal KGs) extend regular Knowledge Graphs by providing temporal scopes (start and end times) on each edge in the KG. While Question Answering over KG (KGQA) has received some attention from the research community, QA over Temporal KGs (Temporal KGQA) is a relatively unexplored area. Lack of broad coverage datasets has been another factor limiting progress in this are&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01515v1-abstract-full').style.display = 'inline'; document.getElementById('2106.01515v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.01515v1-abstract-full" style="display: none;"> Temporal Knowledge Graphs (Temporal KGs) extend regular Knowledge Graphs by providing temporal scopes (start and end times) on each edge in the KG. While Question Answering over KG (KGQA) has received some attention from the research community, QA over Temporal KGs (Temporal KGQA) is a relatively unexplored area. Lack of broad coverage datasets has been another factor limiting progress in this area. We address this challenge by presenting CRONQUESTIONS, the largest known Temporal KGQA dataset, clearly stratified into buckets of structural complexity. CRONQUESTIONS expands the only known previous dataset by a factor of 340x. We find that various state-of-the-art KGQA methods fall far short of the desired performance on this new dataset. In response, we also propose CRONKGQA, a transformer-based solution that exploits recent advances in Temporal KG embeddings, and achieves performance superior to all baselines, with an increase of 120% in accuracy over the next best performing method. Through extensive experiments, we give detailed insights into the workings of CRONKGQA, as well as situations where significant further improvements appear possible. In addition to the dataset, we have released our code as well. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01515v1-abstract-full').style.display = 'none'; document.getElementById('2106.01515v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.10730">arXiv:2103.10730</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.10730">pdf</a>, <a href="https://arxiv.org/format/2103.10730">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MuRIL: Multilingual Representations for Indian Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khanuja%2C+S">Simran Khanuja</a>, <a href="/search/cs?searchtype=author&amp;query=Bansal%2C+D">Diksha Bansal</a>, <a href="/search/cs?searchtype=author&amp;query=Mehtani%2C+S">Sarvesh Mehtani</a>, <a href="/search/cs?searchtype=author&amp;query=Khosla%2C+S">Savya Khosla</a>, <a href="/search/cs?searchtype=author&amp;query=Dey%2C+A">Atreyee Dey</a>, <a href="/search/cs?searchtype=author&amp;query=Gopalan%2C+B">Balaji Gopalan</a>, <a href="/search/cs?searchtype=author&amp;query=Margam%2C+D+K">Dilip Kumar Margam</a>, <a href="/search/cs?searchtype=author&amp;query=Aggarwal%2C+P">Pooja Aggarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Nagipogu%2C+R+T">Rajiv Teja Nagipogu</a>, <a href="/search/cs?searchtype=author&amp;query=Dave%2C+S">Shachi Dave</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+S">Shruti Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Gali%2C+S+C+B">Subhash Chandra Bose Gali</a>, <a href="/search/cs?searchtype=author&amp;query=Subramanian%2C+V">Vish Subramanian</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.10730v2-abstract-short" style="display: inline;"> India is a multilingual society with 1369 rationalized languages and dialects being spoken across the country (INDIA, 2011). Of these, the 22 scheduled languages have a staggering total of 1.17 billion speakers and 121 languages have more than 10,000 speakers (INDIA, 2011). India also has the second largest (and an ever growing) digital footprint (Statista, 2020). Despite this, today&#39;s state-of-th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.10730v2-abstract-full').style.display = 'inline'; document.getElementById('2103.10730v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.10730v2-abstract-full" style="display: none;"> India is a multilingual society with 1369 rationalized languages and dialects being spoken across the country (INDIA, 2011). Of these, the 22 scheduled languages have a staggering total of 1.17 billion speakers and 121 languages have more than 10,000 speakers (INDIA, 2011). India also has the second largest (and an ever growing) digital footprint (Statista, 2020). Despite this, today&#39;s state-of-the-art multilingual systems perform suboptimally on Indian (IN) languages. This can be explained by the fact that multilingual language models (LMs) are often trained on 100+ languages together, leading to a small representation of IN languages in their vocabulary and training data. Multilingual LMs are substantially less effective in resource-lean scenarios (Wu and Dredze, 2020; Lauscher et al., 2020), as limited data doesn&#39;t help capture the various nuances of a language. One also commonly observes IN language text transliterated to Latin or code-mixed with English, especially in informal settings (for example, on social media platforms) (Rijhwani et al., 2017). This phenomenon is not adequately handled by current state-of-the-art multilingual LMs. To address the aforementioned gaps, we propose MuRIL, a multilingual LM specifically built for IN languages. MuRIL is trained on significantly large amounts of IN text corpora only. We explicitly augment monolingual text corpora with both translated and transliterated document pairs, that serve as supervised cross-lingual signals in training. MuRIL significantly outperforms multilingual BERT (mBERT) on all tasks in the challenging cross-lingual XTREME benchmark (Hu et al., 2020). We also present results on transliterated (native to Latin script) test sets of the chosen datasets and demonstrate the efficacy of MuRIL in handling transliterated data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.10730v2-abstract-full').style.display = 'none'; document.getElementById('2103.10730v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.13693">arXiv:2012.13693</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2012.13693">pdf</a>, <a href="https://arxiv.org/format/2012.13693">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Spatial Reasoning from Natural Language Instructions for Robot Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Venkatesh%2C+S+G">Sagar Gubbi Venkatesh</a>, <a href="/search/cs?searchtype=author&amp;query=Biswas%2C+A">Anirban Biswas</a>, <a href="/search/cs?searchtype=author&amp;query=Upadrashta%2C+R">Raviteja Upadrashta</a>, <a href="/search/cs?searchtype=author&amp;query=Srinivasan%2C+V">Vikram Srinivasan</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Amrutur%2C+B">Bharadwaj Amrutur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.13693v2-abstract-short" style="display: inline;"> Robots that can manipulate objects in unstructured environments and collaborate with humans can benefit immensely by understanding natural language. We propose a pipelined architecture of two stages to perform spatial reasoning on the text input. All the objects in the scene are first localized, and then the instruction for the robot in natural language and the localized co-ordinates are mapped to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.13693v2-abstract-full').style.display = 'inline'; document.getElementById('2012.13693v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.13693v2-abstract-full" style="display: none;"> Robots that can manipulate objects in unstructured environments and collaborate with humans can benefit immensely by understanding natural language. We propose a pipelined architecture of two stages to perform spatial reasoning on the text input. All the objects in the scene are first localized, and then the instruction for the robot in natural language and the localized co-ordinates are mapped to the start and end co-ordinates corresponding to the locations where the robot must pick up and place the object respectively. We show that representing the localized objects by quantizing their positions to a binary grid is preferable to representing them as a list of 2D co-ordinates. We also show that attention improves generalization and can overcome biases in the dataset. The proposed method is used to pick-and-place playing cards using a robot arm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.13693v2-abstract-full').style.display = 'none'; document.getElementById('2012.13693v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for ICRA 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.12116">arXiv:2005.12116</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.12116">pdf</a>, <a href="https://arxiv.org/format/2005.12116">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> NILE : Natural Language Inference with Faithful Natural Language Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+S">Sawan Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.12116v1-abstract-short" style="display: inline;"> The recent growth in the popularity and success of deep learning models on NLP classification tasks has accompanied the need for generating some form of natural language explanation of the predicted labels. Such generated natural language (NL) explanations are expected to be faithful, i.e., they should correlate well with the model&#39;s internal decision making. In this work, we focus on the task of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.12116v1-abstract-full').style.display = 'inline'; document.getElementById('2005.12116v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.12116v1-abstract-full" style="display: none;"> The recent growth in the popularity and success of deep learning models on NLP classification tasks has accompanied the need for generating some form of natural language explanation of the predicted labels. Such generated natural language (NL) explanations are expected to be faithful, i.e., they should correlate well with the model&#39;s internal decision making. In this work, we focus on the task of natural language inference (NLI) and address the following question: can we build NLI systems which produce labels with high accuracy, while also generating faithful explanations of its decisions? We propose Natural-language Inference over Label-specific Explanations (NILE), a novel NLI method which utilizes auto-generated label-specific NL explanations to produce labels along with its faithful explanation. We demonstrate NILE&#39;s effectiveness over previously reported methods through automated and human evaluation of the produced labels and explanations. Our evaluation of NILE also supports the claim that accurate systems capable of providing testable explanations of their decisions can be designed. We discuss the faithfulness of NILE&#39;s explanations in terms of sensitivity of the decisions to the corresponding explanations. We argue that explicit evaluation of faithfulness, in addition to label and explanation accuracy, is an important step in evaluating model&#39;s explanations. Further, we demonstrate that task-specific probes are necessary to establish such sensitivity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.12116v1-abstract-full').style.display = 'none'; document.getElementById('2005.12116v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 3 figures, Accepted to ACL 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.09069">arXiv:2005.09069</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.09069">pdf</a>, <a href="https://arxiv.org/format/2005.09069">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> P-SIF: Document Embeddings Using Partition Averaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+V">Vivek Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Saw%2C+A">Ankit Saw</a>, <a href="/search/cs?searchtype=author&amp;query=Nokhiz%2C+P">Pegah Nokhiz</a>, <a href="/search/cs?searchtype=author&amp;query=Netrapalli%2C+P">Praneeth Netrapalli</a>, <a href="/search/cs?searchtype=author&amp;query=Rai%2C+P">Piyush Rai</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.09069v1-abstract-short" style="display: inline;"> Simple weighted averaging of word vectors often yields effective representations for sentences which outperform sophisticated seq2seq neural models in many tasks. While it is desirable to use the same method to represent documents as well, unfortunately, the effectiveness is lost when representing long documents involving multiple sentences. One of the key reasons is that a longer document is like&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.09069v1-abstract-full').style.display = 'inline'; document.getElementById('2005.09069v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.09069v1-abstract-full" style="display: none;"> Simple weighted averaging of word vectors often yields effective representations for sentences which outperform sophisticated seq2seq neural models in many tasks. While it is desirable to use the same method to represent documents as well, unfortunately, the effectiveness is lost when representing long documents involving multiple sentences. One of the key reasons is that a longer document is likely to contain words from many different topics; hence, creating a single vector while ignoring all the topical structure is unlikely to yield an effective document representation. This problem is less acute in single sentences and other short text fragments where the presence of a single topic is most likely. To alleviate this problem, we present P-SIF, a partitioned word averaging model to represent long documents. P-SIF retains the simplicity of simple weighted word averaging while taking a document&#39;s topical structure into account. In particular, P-SIF learns topic-specific vectors from a document and finally concatenates them all to represent the overall document. We provide theoretical justifications on the correctness of P-SIF. Through a comprehensive set of experiments, we demonstrate P-SIF&#39;s effectiveness compared to simple weighted averaging and many other baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.09069v1-abstract-full').style.display = 'none'; document.getElementById('2005.09069v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 Pages, 3 Figures, 13 Tables, AAAI 2020, Blog : http://vivgupt.blogspot.com/2019/06/document-vector-estimation-using.html</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.08417">arXiv:2005.08417</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.08417">pdf</a>, <a href="https://arxiv.org/format/2005.08417">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Syntax-guided Controlled Generation of Paraphrases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+A">Ashutosh Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Ahuja%2C+K">Kabir Ahuja</a>, <a href="/search/cs?searchtype=author&amp;query=Vadapalli%2C+R">Raghuram Vadapalli</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.08417v1-abstract-short" style="display: inline;"> Given a sentence (e.g., &#34;I like mangoes&#34;) and a constraint (e.g., sentiment flip), the goal of controlled text generation is to produce a sentence that adapts the input sentence to meet the requirements of the constraint (e.g., &#34;I hate mangoes&#34;). Going beyond such simple constraints, recent works have started exploring the incorporation of complex syntactic-guidance as constraints in the task of c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.08417v1-abstract-full').style.display = 'inline'; document.getElementById('2005.08417v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.08417v1-abstract-full" style="display: none;"> Given a sentence (e.g., &#34;I like mangoes&#34;) and a constraint (e.g., sentiment flip), the goal of controlled text generation is to produce a sentence that adapts the input sentence to meet the requirements of the constraint (e.g., &#34;I hate mangoes&#34;). Going beyond such simple constraints, recent works have started exploring the incorporation of complex syntactic-guidance as constraints in the task of controlled paraphrase generation. In these methods, syntactic-guidance is sourced from a separate exemplar sentence. However, these prior works have only utilized limited syntactic information available in the parse tree of the exemplar sentence. We address this limitation in the paper and propose Syntax Guided Controlled Paraphraser (SGCP), an end-to-end framework for syntactic paraphrase generation. We find that SGCP can generate syntax conforming sentences while not compromising on relevance. We perform extensive automated and human evaluations over multiple real-world English language datasets to demonstrate the efficacy of SGCP over state-of-the-art baselines. To drive future research, we have made SGCP&#39;s source code available <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.08417v1-abstract-full').style.display = 'none'; document.getElementById('2005.08417v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 3 figures, Accepted to TACL 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.07979">arXiv:1911.07979</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.07979">pdf</a>, <a href="https://arxiv.org/format/1911.07979">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> ASAP: Adaptive Structure Aware Pooling for Learning Hierarchical Graph Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ranjan%2C+E">Ekagra Ranjan</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Soumya Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P+P">Partha Pratim Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.07979v3-abstract-short" style="display: inline;"> Graph Neural Networks (GNN) have been shown to work effectively for modeling graph structured data to solve tasks such as node classification, link prediction and graph classification. There has been some recent progress in defining the notion of pooling in graphs whereby the model tries to generate a graph level representation by downsampling and summarizing the information present in the nodes.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.07979v3-abstract-full').style.display = 'inline'; document.getElementById('1911.07979v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.07979v3-abstract-full" style="display: none;"> Graph Neural Networks (GNN) have been shown to work effectively for modeling graph structured data to solve tasks such as node classification, link prediction and graph classification. There has been some recent progress in defining the notion of pooling in graphs whereby the model tries to generate a graph level representation by downsampling and summarizing the information present in the nodes. Existing pooling methods either fail to effectively capture the graph substructure or do not easily scale to large graphs. In this work, we propose ASAP (Adaptive Structure Aware Pooling), a sparse and differentiable pooling method that addresses the limitations of previous graph pooling architectures. ASAP utilizes a novel self-attention network along with a modified GNN formulation to capture the importance of each node in a given graph. It also learns a sparse soft cluster assignment for nodes at each layer to effectively pool the subgraphs to form the pooled graph. Through extensive experiments on multiple datasets and theoretical analysis, we motivate our choice of the components used in ASAP. Our experimental results show that combining existing GNN architectures with ASAP leads to state-of-the-art results on multiple graph classification benchmarks. ASAP has an average improvement of 4%, compared to current sparse hierarchical state-of-the-art method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.07979v3-abstract-full').style.display = 'none'; document.getElementById('1911.07979v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The Thirty-Fourth AAAI Conference on Artificial Intelligence (AAAI 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.07918">arXiv:1911.07918</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.07918">pdf</a>, <a href="https://arxiv.org/format/1911.07918">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Document Classification with Multi-Sense Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+V">Vivek Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Saw%2C+A">Ankit Saw</a>, <a href="/search/cs?searchtype=author&amp;query=Nokhiz%2C+P">Pegah Nokhiz</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+H">Harshit Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.07918v1-abstract-short" style="display: inline;"> Efficient representation of text documents is an important building block in many NLP tasks. Research on long text categorization has shown that simple weighted averaging of word vectors for sentence representation often outperforms more sophisticated neural models. Recently proposed Sparse Composite Document Vector (SCDV) (Mekala et. al, 2017) extends this approach from sentences to documents usi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.07918v1-abstract-full').style.display = 'inline'; document.getElementById('1911.07918v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.07918v1-abstract-full" style="display: none;"> Efficient representation of text documents is an important building block in many NLP tasks. Research on long text categorization has shown that simple weighted averaging of word vectors for sentence representation often outperforms more sophisticated neural models. Recently proposed Sparse Composite Document Vector (SCDV) (Mekala et. al, 2017) extends this approach from sentences to documents using soft clustering over word vectors. However, SCDV disregards the multi-sense nature of words, and it also suffers from the curse of higher dimensionality. In this work, we address these shortcomings and propose SCDV-MS. SCDV-MS utilizes multi-sense word embeddings and learns a lower dimensional manifold. Through extensive experiments on multiple real-world datasets, we show that SCDV-MS embeddings outperform previous state-of-the-art embeddings on multi-class and multi-label text categorization tasks. Furthermore, SCDV-MS embeddings are more efficient than SCDV in terms of time and space complexity on textual classification tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.07918v1-abstract-full').style.display = 'none'; document.getElementById('1911.07918v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 Pages, 7 Figures, 12 Tables, under review at ECAI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.03903">arXiv:1911.03903</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.03903">pdf</a>, <a href="https://arxiv.org/format/1911.03903">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Re-evaluation of Knowledge Graph Completion Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zhiqing Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Soumya Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yiming Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.03903v3-abstract-short" style="display: inline;"> Knowledge Graph Completion (KGC) aims at automatically predicting missing links for large-scale knowledge graphs. A vast number of state-of-the-art KGC techniques have got published at top conferences in several research fields, including data mining, machine learning, and natural language processing. However, we notice that several recent papers report very high performance, which largely outperf&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.03903v3-abstract-full').style.display = 'inline'; document.getElementById('1911.03903v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.03903v3-abstract-full" style="display: none;"> Knowledge Graph Completion (KGC) aims at automatically predicting missing links for large-scale knowledge graphs. A vast number of state-of-the-art KGC techniques have got published at top conferences in several research fields, including data mining, machine learning, and natural language processing. However, we notice that several recent papers report very high performance, which largely outperforms previous state-of-the-art methods. In this paper, we find that this can be attributed to the inappropriate evaluation protocol used by them and propose a simple evaluation protocol to address this problem. The proposed protocol is robust to handle bias in the model, which can substantially affect the final results. We conduct extensive experiments and report the performance of several existing methods using our protocol. The reproducible code has been made publicly available <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.03903v3-abstract-full').style.display = 'none'; document.getElementById('1911.03903v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACL 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.03082">arXiv:1911.03082</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.03082">pdf</a>, <a href="https://arxiv.org/format/1911.03082">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Composition-based Multi-Relational Graph Convolutional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Soumya Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Nitin%2C+V">Vikram Nitin</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.03082v2-abstract-short" style="display: inline;"> Graph Convolutional Networks (GCNs) have recently been shown to be quite successful in modeling graph-structured data. However, the primary focus has been on handling simple undirected graphs. Multi-relational graphs are a more general and prevalent form of graphs where each edge has a label and direction associated with it. Most of the existing approaches to handle such graphs suffer from over-pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.03082v2-abstract-full').style.display = 'inline'; document.getElementById('1911.03082v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.03082v2-abstract-full" style="display: none;"> Graph Convolutional Networks (GCNs) have recently been shown to be quite successful in modeling graph-structured data. However, the primary focus has been on handling simple undirected graphs. Multi-relational graphs are a more general and prevalent form of graphs where each edge has a label and direction associated with it. Most of the existing approaches to handle such graphs suffer from over-parameterization and are restricted to learning representations of nodes only. In this paper, we propose CompGCN, a novel Graph Convolutional framework which jointly embeds both nodes and relations in a relational graph. CompGCN leverages a variety of entity-relation composition operations from Knowledge Graph Embedding techniques and scales with the number of relations. It also generalizes several of the existing multi-relational GCN methods. We evaluate our proposed method on multiple tasks such as node classification, link prediction, and graph classification, and achieve demonstrably superior results. We make the source code of CompGCN available to foster reproducible research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.03082v2-abstract-full').style.display = 'none'; document.getElementById('1911.03082v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of ICLR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.00219">arXiv:1911.00219</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.00219">pdf</a>, <a href="https://arxiv.org/format/1911.00219">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> InteractE: Improving Convolution-based Knowledge Graph Embeddings by Increasing Feature Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Soumya Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Nitin%2C+V">Vikram Nitin</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+N">Nilesh Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.00219v3-abstract-short" style="display: inline;"> Most existing knowledge graphs suffer from incompleteness, which can be alleviated by inferring missing links based on known facts. One popular way to accomplish this is to generate low-dimensional embeddings of entities and relations, and use these to make inferences. ConvE, a recently proposed approach, applies convolutional filters on 2D reshapings of entity and relation embeddings in order to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.00219v3-abstract-full').style.display = 'inline'; document.getElementById('1911.00219v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.00219v3-abstract-full" style="display: none;"> Most existing knowledge graphs suffer from incompleteness, which can be alleviated by inferring missing links based on known facts. One popular way to accomplish this is to generate low-dimensional embeddings of entities and relations, and use these to make inferences. ConvE, a recently proposed approach, applies convolutional filters on 2D reshapings of entity and relation embeddings in order to capture rich interactions between their components. However, the number of interactions that ConvE can capture is limited. In this paper, we analyze how increasing the number of these interactions affects link prediction performance, and utilize our observations to propose InteractE. InteractE is based on three key ideas -- feature permutation, a novel feature reshaping, and circular convolution. Through extensive experiments, we find that InteractE outperforms state-of-the-art convolutional link prediction baselines on FB15k-237. Further, InteractE achieves an MRR score that is 9%, 7.5%, and 23% better than ConvE on the FB15k-237, WN18RR and YAGO3-10 datasets respectively. The results validate our central hypothesis -- that increasing feature interaction is beneficial to link prediction performance. We make the source code of InteractE available to encourage reproducible research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.00219v3-abstract-full').style.display = 'none'; document.getElementById('1911.00219v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.11861">arXiv:1906.11861</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.11861">pdf</a>, <a href="https://arxiv.org/format/1906.11861">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Relating Simple Sentence Representations in Deep Neural Networks and the Brain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jat%2C+S">Sharmistha Jat</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+H">Hao Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a>, <a href="/search/cs?searchtype=author&amp;query=Mitchell%2C+T">Tom Mitchell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.11861v1-abstract-short" style="display: inline;"> What is the relationship between sentence representations learned by deep recurrent models against those encoded by the brain? Is there any correspondence between hidden layers of these recurrent models and brain regions when processing sentences? Can these deep models be used to synthesize brain data which can then be utilized in other extrinsic tasks? We investigate these questions using sentenc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.11861v1-abstract-full').style.display = 'inline'; document.getElementById('1906.11861v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.11861v1-abstract-full" style="display: none;"> What is the relationship between sentence representations learned by deep recurrent models against those encoded by the brain? Is there any correspondence between hidden layers of these recurrent models and brain regions when processing sentences? Can these deep models be used to synthesize brain data which can then be utilized in other extrinsic tasks? We investigate these questions using sentences with simple syntax and semantics (e.g., The bone was eaten by the dog.). We consider multiple neural network architectures, including recently proposed ELMo and BERT. We use magnetoencephalography (MEG) brain recording data collected from human subjects when they were reading these simple sentences. Overall, we find that BERT&#39;s activations correlate the best with MEG brain data. We also find that the deep network representation can be used to generate brain data from new sentences to augment existing brain data. To the best of our knowledge, this is the first work showing that the MEG brain recording when reading a word in a sentence can be used to distinguish earlier words in the sentence. Our exploration is also the first to use deep neural network representations to generate synthetic brain data and to show that it helps in improving subsequent stimuli decoding task accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.11861v1-abstract-full').style.display = 'none'; document.getElementById('1906.11861v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Association for Computational Linguistics (ACL) 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.02161">arXiv:1902.02161</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1902.02161">pdf</a>, <a href="https://arxiv.org/format/1902.02161">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AD3: Attentive Deep Document Dater </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ray%2C+S+N">Swayambhu Nath Ray</a>, <a href="/search/cs?searchtype=author&amp;query=Dasgupta%2C+S+S">Shib Sankar Dasgupta</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.02161v1-abstract-short" style="display: inline;"> Knowledge of the creation date of documents facilitates several tasks such as summarization, event extraction, temporally focused information extraction etc. Unfortunately, for most of the documents on the Web, the time-stamp metadata is either missing or can&#39;t be trusted. Thus, predicting creation time from document content itself is an important task. In this paper, we propose Attentive Deep Doc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.02161v1-abstract-full').style.display = 'inline'; document.getElementById('1902.02161v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.02161v1-abstract-full" style="display: none;"> Knowledge of the creation date of documents facilitates several tasks such as summarization, event extraction, temporally focused information extraction etc. Unfortunately, for most of the documents on the Web, the time-stamp metadata is either missing or can&#39;t be trusted. Thus, predicting creation time from document content itself is an important task. In this paper, we propose Attentive Deep Document Dater (AD3), an attention-based neural document dating system which utilizes both context and temporal information in documents in a flexible and principled manner. We perform extensive experimentation on multiple real-world datasets to demonstrate the effectiveness of AD3 over neural and non-neural baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.02161v1-abstract-full').style.display = 'none'; document.getElementById('1902.02161v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> DBLP:conf/emnlp/RayDT18 (2018) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.00175">arXiv:1902.00175</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1902.00175">pdf</a>, <a href="https://arxiv.org/format/1902.00175">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dating Documents using Graph Convolution Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Dasgupta%2C+S+S">Shib Sankar Dasgupta</a>, <a href="/search/cs?searchtype=author&amp;query=Ray%2C+S+N">Swayambhu Nath Ray</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.00175v1-abstract-short" style="display: inline;"> Document date is essential for many important tasks, such as document retrieval, summarization, event detection, etc. While existing approaches for these tasks assume accurate knowledge of the document date, this is not always available, especially for arbitrary documents from the Web. Document Dating is a challenging problem which requires inference over the temporal structure of the document. Pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.00175v1-abstract-full').style.display = 'inline'; document.getElementById('1902.00175v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.00175v1-abstract-full" style="display: none;"> Document date is essential for many important tasks, such as document retrieval, summarization, event detection, etc. While existing approaches for these tasks assume accurate knowledge of the document date, this is not always available, especially for arbitrary documents from the Web. Document Dating is a challenging problem which requires inference over the temporal structure of the document. Prior document dating systems have largely relied on handcrafted features while ignoring such document internal structures. In this paper, we propose NeuralDater, a Graph Convolutional Network (GCN) based document dating approach which jointly exploits syntactic and temporal graph structures of document in a principled way. To the best of our knowledge, this is the first application of deep learning for the problem of document dating. Through extensive experiments on real-world datasets, we find that NeuralDater significantly outperforms state-of-the-art baseline by 19% absolute (45% relative) accuracy points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.00175v1-abstract-full').style.display = 'none'; document.getElementById('1902.00175v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACL 2018</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics 2018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.00172">arXiv:1902.00172</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1902.00172">pdf</a>, <a href="https://arxiv.org/format/1902.00172">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3178876.3186030">10.1145/3178876.3186030 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> CESI: Canonicalizing Open Knowledge Bases using Embeddings and Side Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Jain%2C+P">Prince Jain</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.00172v1-abstract-short" style="display: inline;"> Open Information Extraction (OpenIE) methods extract (noun phrase, relation phrase, noun phrase) triples from text, resulting in the construction of large Open Knowledge Bases (Open KBs). The noun phrases (NPs) and relation phrases in such Open KBs are not canonicalized, leading to the storage of redundant and ambiguous facts. Recent research has posed canonicalization of Open KBs as clustering ov&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.00172v1-abstract-full').style.display = 'inline'; document.getElementById('1902.00172v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.00172v1-abstract-full" style="display: none;"> Open Information Extraction (OpenIE) methods extract (noun phrase, relation phrase, noun phrase) triples from text, resulting in the construction of large Open Knowledge Bases (Open KBs). The noun phrases (NPs) and relation phrases in such Open KBs are not canonicalized, leading to the storage of redundant and ambiguous facts. Recent research has posed canonicalization of Open KBs as clustering over manuallydefined feature spaces. Manual feature engineering is expensive and often sub-optimal. In order to overcome this challenge, we propose Canonicalization using Embeddings and Side Information (CESI) - a novel approach which performs canonicalization over learned embeddings of Open KBs. CESI extends recent advances in KB embedding by incorporating relevant NP and relation phrase side information in a principled manner. Through extensive experiments on multiple real-world datasets, we demonstrate CESI&#39;s effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.00172v1-abstract-full').style.display = 'none'; document.getElementById('1902.00172v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WWW 2018</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International World Wide Web Conferences Steering Committee 2018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.08255">arXiv:1901.08255</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1901.08255">pdf</a>, <a href="https://arxiv.org/format/1901.08255">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Confidence-based Graph Convolutional Networks for Semi-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Yadav%2C+P">Prateek Yadav</a>, <a href="/search/cs?searchtype=author&amp;query=Bhandari%2C+M">Manik Bhandari</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.08255v2-abstract-short" style="display: inline;"> Predicting properties of nodes in a graph is an important problem with applications in a variety of domains. Graph-based Semi-Supervised Learning (SSL) methods aim to address this problem by labeling a small subset of the nodes as seeds and then utilizing the graph structure to predict label scores for the rest of the nodes in the graph. Recently, Graph Convolutional Networks (GCNs) have achieved&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.08255v2-abstract-full').style.display = 'inline'; document.getElementById('1901.08255v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.08255v2-abstract-full" style="display: none;"> Predicting properties of nodes in a graph is an important problem with applications in a variety of domains. Graph-based Semi-Supervised Learning (SSL) methods aim to address this problem by labeling a small subset of the nodes as seeds and then utilizing the graph structure to predict label scores for the rest of the nodes in the graph. Recently, Graph Convolutional Networks (GCNs) have achieved impressive performance on the graph-based SSL task. In addition to label scores, it is also desirable to have confidence scores associated with them. Unfortunately, confidence estimation in the context of GCN has not been previously explored. We fill this important gap in this paper and propose ConfGCN, which estimates labels scores along with their confidences jointly in GCN-based setting. ConfGCN uses these estimated confidences to determine the influence of one node on another during neighborhood aggregation, thereby acquiring anisotropic capabilities. Through extensive analysis and experiments on standard benchmarks, we find that ConfGCN is able to outperform state-of-the-art baselines. We have made ConfGCN&#39;s source code available to encourage reproducible research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.08255v2-abstract-full').style.display = 'none'; document.getElementById('1901.08255v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AISTATS 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.04361">arXiv:1812.04361</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1812.04361">pdf</a>, <a href="https://arxiv.org/format/1812.04361">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RESIDE: Improving Distantly-Supervised Neural Relation Extraction using Side Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+R">Rishabh Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=Prayaga%2C+S+S">Sai Suman Prayaga</a>, <a href="/search/cs?searchtype=author&amp;query=Bhattacharyya%2C+C">Chiranjib Bhattacharyya</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.04361v2-abstract-short" style="display: inline;"> Distantly-supervised Relation Extraction (RE) methods train an extractor by automatically aligning relation instances in a Knowledge Base (KB) with unstructured text. In addition to relation instances, KBs often contain other relevant side information, such as aliases of relations (e.g., founded and co-founded are aliases for the relation founderOfCompany). RE models usually ignore such readily av&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.04361v2-abstract-full').style.display = 'inline'; document.getElementById('1812.04361v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.04361v2-abstract-full" style="display: none;"> Distantly-supervised Relation Extraction (RE) methods train an extractor by automatically aligning relation instances in a Knowledge Base (KB) with unstructured text. In addition to relation instances, KBs often contain other relevant side information, such as aliases of relations (e.g., founded and co-founded are aliases for the relation founderOfCompany). RE models usually ignore such readily available side information. In this paper, we propose RESIDE, a distantly-supervised neural relation extraction method which utilizes additional side information from KBs for improved relation extraction. It uses entity type and relation alias information for imposing soft constraints while predicting relations. RESIDE employs Graph Convolution Networks (GCN) to encode syntactic information from text and improves performance even when limited side information is available. Through extensive experiments on benchmark datasets, we demonstrate RESIDE&#39;s effectiveness. We have made RESIDE&#39;s source code available to encourage reproducible research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.04361v2-abstract-full').style.display = 'none'; document.getElementById('1812.04361v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures, EMNLP 2018</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.05660">arXiv:1811.05660</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1811.05660">pdf</a>, <a href="https://arxiv.org/format/1811.05660">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> MT-CGCNN: Integrating Crystal Graph Convolutional Neural Network with Multitask Learning for Material Property Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Soumya Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Balachandran%2C+J">Janakiraman Balachandran</a>, <a href="/search/cs?searchtype=author&amp;query=Yadati%2C+N">Naganand Yadati</a>, <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+A">Abhishek Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Rajagopalan%2C+P">Padmini Rajagopalan</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Suchismita Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.05660v1-abstract-short" style="display: inline;"> Developing accurate, transferable and computationally inexpensive machine learning models can rapidly accelerate the discovery and development of new materials. Some of the major challenges involved in developing such models are, (i) limited availability of materials data as compared to other fields, (ii) lack of universal descriptor of materials to predict its various properties. The limited avai&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.05660v1-abstract-full').style.display = 'inline'; document.getElementById('1811.05660v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.05660v1-abstract-full" style="display: none;"> Developing accurate, transferable and computationally inexpensive machine learning models can rapidly accelerate the discovery and development of new materials. Some of the major challenges involved in developing such models are, (i) limited availability of materials data as compared to other fields, (ii) lack of universal descriptor of materials to predict its various properties. The limited availability of materials data can be addressed through transfer learning, while the generic representation was recently addressed by Xie and Grossman [1], where they developed a crystal graph convolutional neural network (CGCNN) that provides a unified representation of crystals. In this work, we develop a new model (MT-CGCNN) by integrating CGCNN with transfer learning based on multi-task (MT) learning. We demonstrate the effectiveness of MT-CGCNN by simultaneous prediction of various material properties such as Formation Energy, Band Gap and Fermi Energy for a wide range of inorganic crystals (46774 materials). MT-CGCNN is able to reduce the test error when employed on correlated properties by upto 8%. The model prediction has lower test error compared to CGCNN, even when the training data is reduced by 10%. We also demonstrate our model&#39;s better performance through prediction of end user scenario related to metal/non-metal classification. These results encourage further development of machine learning approaches which leverage multi-task learning to address the aforementioned challenges in the discovery of new materials. We make MT-CGCNN&#39;s source code available to encourage reproducible research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.05660v1-abstract-full').style.display = 'none'; document.getElementById('1811.05660v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NIPS Workshop on Machine Learning for Molecules and Materials</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.04283">arXiv:1809.04283</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1809.04283">pdf</a>, <a href="https://arxiv.org/format/1809.04283">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Incorporating Syntactic and Semantic Information in Word Embeddings using Graph Convolutional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Bhandari%2C+M">Manik Bhandari</a>, <a href="/search/cs?searchtype=author&amp;query=Yadav%2C+P">Prateek Yadav</a>, <a href="/search/cs?searchtype=author&amp;query=Rai%2C+P">Piyush Rai</a>, <a href="/search/cs?searchtype=author&amp;query=Bhattacharyya%2C+C">Chiranjib Bhattacharyya</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.04283v4-abstract-short" style="display: inline;"> Word embeddings have been widely adopted across several NLP applications. Most existing word embedding methods utilize sequential context of a word to learn its embedding. While there have been some attempts at utilizing syntactic context of a word, such methods result in an explosion of the vocabulary size. In this paper, we overcome this problem by proposing SynGCN, a flexible Graph Convolution&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.04283v4-abstract-full').style.display = 'inline'; document.getElementById('1809.04283v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.04283v4-abstract-full" style="display: none;"> Word embeddings have been widely adopted across several NLP applications. Most existing word embedding methods utilize sequential context of a word to learn its embedding. While there have been some attempts at utilizing syntactic context of a word, such methods result in an explosion of the vocabulary size. In this paper, we overcome this problem by proposing SynGCN, a flexible Graph Convolution based method for learning word embeddings. SynGCN utilizes the dependency context of a word without increasing the vocabulary size. Word embeddings learned by SynGCN outperform existing methods on various intrinsic and extrinsic tasks and provide an advantage when used with ELMo. We also propose SemGCN, an effective framework for incorporating diverse semantic knowledge for further enhancing learned word representations. We make the source code of both models available to encourage reproducible research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.04283v4-abstract-full').style.display = 'none'; document.getElementById('1809.04283v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 2 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 57th Annual Meeting of the Association for Computational Linguistics (ACL 2019) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.02589">arXiv:1809.02589</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1809.02589">pdf</a>, <a href="https://arxiv.org/format/1809.02589">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> HyperGCN: A New Method of Training Graph Convolutional Networks on Hypergraphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yadati%2C+N">Naganand Yadati</a>, <a href="/search/cs?searchtype=author&amp;query=Nimishakavi%2C+M">Madhav Nimishakavi</a>, <a href="/search/cs?searchtype=author&amp;query=Yadav%2C+P">Prateek Yadav</a>, <a href="/search/cs?searchtype=author&amp;query=Nitin%2C+V">Vikram Nitin</a>, <a href="/search/cs?searchtype=author&amp;query=Louis%2C+A">Anand Louis</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.02589v4-abstract-short" style="display: inline;"> In many real-world network datasets such as co-authorship, co-citation, email communication, etc., relationships are complex and go beyond pairwise. Hypergraphs provide a flexible and natural modeling tool to model such complex relationships. The obvious existence of such complex relationships in many real-world networks naturaly motivates the problem of learning with hypergraphs. A popular learni&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.02589v4-abstract-full').style.display = 'inline'; document.getElementById('1809.02589v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.02589v4-abstract-full" style="display: none;"> In many real-world network datasets such as co-authorship, co-citation, email communication, etc., relationships are complex and go beyond pairwise. Hypergraphs provide a flexible and natural modeling tool to model such complex relationships. The obvious existence of such complex relationships in many real-world networks naturaly motivates the problem of learning with hypergraphs. A popular learning paradigm is hypergraph-based semi-supervised learning (SSL) where the goal is to assign labels to initially unlabeled vertices in a hypergraph. Motivated by the fact that a graph convolutional network (GCN) has been effective for graph-based SSL, we propose HyperGCN, a novel GCN for SSL on attributed hypergraphs. Additionally, we show how HyperGCN can be used as a learning-based approach for combinatorial optimisation on NP-hard hypergraph problems. We demonstrate HyperGCN&#39;s effectiveness through detailed experimentation on real-world hypergraphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.02589v4-abstract-full').style.display = 'none'; document.getElementById('1809.02589v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.11365">arXiv:1805.11365</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1805.11365">pdf</a>, <a href="https://arxiv.org/format/1805.11365">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Lovasz Convolutional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yadav%2C+P">Prateek Yadav</a>, <a href="/search/cs?searchtype=author&amp;query=Nimishakavi%2C+M">Madhav Nimishakavi</a>, <a href="/search/cs?searchtype=author&amp;query=Yadati%2C+N">Naganand Yadati</a>, <a href="/search/cs?searchtype=author&amp;query=Vashishth%2C+S">Shikhar Vashishth</a>, <a href="/search/cs?searchtype=author&amp;query=Rajkumar%2C+A">Arun Rajkumar</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.11365v3-abstract-short" style="display: inline;"> Semi-supervised learning on graph structured data has received significant attention with the recent introduction of Graph Convolution Networks (GCN). While traditional methods have focused on optimizing a loss augmented with Laplacian regularization framework, GCNs perform an implicit Laplacian type regularization to capture local graph structure. In this work, we propose Lovasz Convolutional Net&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.11365v3-abstract-full').style.display = 'inline'; document.getElementById('1805.11365v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.11365v3-abstract-full" style="display: none;"> Semi-supervised learning on graph structured data has received significant attention with the recent introduction of Graph Convolution Networks (GCN). While traditional methods have focused on optimizing a loss augmented with Laplacian regularization framework, GCNs perform an implicit Laplacian type regularization to capture local graph structure. In this work, we propose Lovasz Convolutional Network (LCNs) which are capable of incorporating global graph properties. LCNs achieve this by utilizing Lovasz&#39;s orthonormal embeddings of the nodes. We analyse local and global properties of graphs and demonstrate settings where LCNs tend to work better than GCNs. We validate the proposed method on standard random graph models such as stochastic block models (SBM) and certain community structure based graphs where LCNs outperform GCNs and learn more intuitive embeddings. We also perform extensive binary and multi-class classification experiments on real world datasets to demonstrate LCN&#39;s effectiveness. In addition to simple graphs, we also demonstrate the use of LCNs on hyper-graphs by identifying settings where they are expected to work better than GCNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.11365v3-abstract-full').style.display = 'none'; document.getElementById('1805.11365v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AISTATS 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.06987">arXiv:1804.06987</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1804.06987">pdf</a>, <a href="https://arxiv.org/format/1804.06987">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving Distantly Supervised Relation Extraction using Word and Entity Based Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jat%2C+S">Sharmistha Jat</a>, <a href="/search/cs?searchtype=author&amp;query=Khandelwal%2C+S">Siddhesh Khandelwal</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.06987v1-abstract-short" style="display: inline;"> Relation extraction is the problem of classifying the relationship between two entities in a given sentence. Distant Supervision (DS) is a popular technique for developing relation extractors starting with limited supervision. We note that most of the sentences in the distant supervision relation extraction setting are very long and may benefit from word attention for better sentence representatio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.06987v1-abstract-full').style.display = 'inline'; document.getElementById('1804.06987v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.06987v1-abstract-full" style="display: none;"> Relation extraction is the problem of classifying the relationship between two entities in a given sentence. Distant Supervision (DS) is a popular technique for developing relation extractors starting with limited supervision. We note that most of the sentences in the distant supervision relation extraction setting are very long and may benefit from word attention for better sentence representation. Our contributions in this paper are threefold. Firstly, we propose two novel word attention models for distantly- supervised relation extraction: (1) a Bi-directional Gated Recurrent Unit (Bi-GRU) based word attention model (BGWA), (2) an entity-centric attention model (EA), and (3) a combination model which combines multiple complementary models using weighted voting method for improved relation extraction. Secondly, we introduce GDS, a new distant supervision dataset for relation extraction. GDS removes test data noise present in all previous distant- supervision benchmark datasets, making credible automatic evaluation possible. Thirdly, through extensive experiments on multiple real-world datasets, we demonstrate the effectiveness of the proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.06987v1-abstract-full').style.display = 'none'; document.getElementById('1804.06987v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.06371">arXiv:1802.06371</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1802.06371">pdf</a>, <a href="https://arxiv.org/format/1802.06371">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Inductive Framework for Multi-Aspect Streaming Tensor Completion with Side Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nimishakavi%2C+M">Madhav Nimishakavi</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+B">Bamdev Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+M">Manish Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.06371v3-abstract-short" style="display: inline;"> Low rank tensor completion is a well studied problem and has applications in various fields. However, in many real world applications the data is dynamic, i.e., new data arrives at different time intervals. As a result, the tensors used to represent the data grow in size. Besides the tensors, in many real world scenarios, side information is also available in the form of matrices which also grow i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.06371v3-abstract-full').style.display = 'inline'; document.getElementById('1802.06371v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.06371v3-abstract-full" style="display: none;"> Low rank tensor completion is a well studied problem and has applications in various fields. However, in many real world applications the data is dynamic, i.e., new data arrives at different time intervals. As a result, the tensors used to represent the data grow in size. Besides the tensors, in many real world scenarios, side information is also available in the form of matrices which also grow in size with time. The problem of predicting missing values in the dynamically growing tensor is called dynamic tensor completion. Most of the previous work in dynamic tensor completion make an assumption that the tensor grows only in one mode. To the best of our Knowledge, there is no previous work which incorporates side information with dynamic tensor completion. We bridge this gap in this paper by proposing a dynamic tensor completion framework called Side Information infused Incremental Tensor Analysis (SIITA), which incorporates side information and works for general incremental tensors. We also show how non-negative constraints can be incorporated with SIITA, which is essential for mining interpretable latent clusters. We carry out extensive experiments on multiple real world datasets to demonstrate the effectiveness of SIITA in various different settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.06371v3-abstract-full').style.display = 'none'; document.getElementById('1802.06371v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to International Conference on Information and Knowledge Management (CIKM), 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.03547">arXiv:1712.03547</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1712.03547">pdf</a>, <a href="https://arxiv.org/ps/1712.03547">ps</a>, <a href="https://arxiv.org/format/1712.03547">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Inducing Interpretability in Knowledge Graph Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chandrahas"> Chandrahas</a>, <a href="/search/cs?searchtype=author&amp;query=Sengupta%2C+T">Tathagata Sengupta</a>, <a href="/search/cs?searchtype=author&amp;query=Pragadeesh%2C+C">Cibi Pragadeesh</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P+P">Partha Pratim Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.03547v1-abstract-short" style="display: inline;"> We study the problem of inducing interpretability in KG embeddings. Specifically, we explore the Universal Schema (Riedel et al., 2013) and propose a method to induce interpretability. There have been many vector space models proposed for the problem, however, most of these methods don&#39;t address the interpretability (semantics) of individual dimensions. In this work, we study this problem and prop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.03547v1-abstract-full').style.display = 'inline'; document.getElementById('1712.03547v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.03547v1-abstract-full" style="display: none;"> We study the problem of inducing interpretability in KG embeddings. Specifically, we explore the Universal Schema (Riedel et al., 2013) and propose a method to induce interpretability. There have been many vector space models proposed for the problem, however, most of these methods don&#39;t address the interpretability (semantics) of individual dimensions. In this work, we study this problem and propose a method for inducing interpretability in KG embeddings using entity co-occurrence statistics. The proposed method significantly improves the interpretability, while maintaining comparable performance in other KG tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.03547v1-abstract-full').style.display = 'none'; document.getElementById('1712.03547v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1711.05401">arXiv:1711.05401</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1711.05401">pdf</a>, <a href="https://arxiv.org/format/1711.05401">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Simple Neural Networks for Learning Representations of Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ravishankar%2C+S">Srinivas Ravishankar</a>, <a href="/search/cs?searchtype=author&amp;query=Chandrahas"> Chandrahas</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P+P">Partha Pratim Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1711.05401v3-abstract-short" style="display: inline;"> We address the problem of learning vector representations for entities and relations in Knowledge Graphs (KGs) for Knowledge Base Completion (KBC). This problem has received significant attention in the past few years and multiple methods have been proposed. Most of the existing methods in the literature use a predefined characteristic scoring function for evaluating the correctness of KG triples.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.05401v3-abstract-full').style.display = 'inline'; document.getElementById('1711.05401v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1711.05401v3-abstract-full" style="display: none;"> We address the problem of learning vector representations for entities and relations in Knowledge Graphs (KGs) for Knowledge Base Completion (KBC). This problem has received significant attention in the past few years and multiple methods have been proposed. Most of the existing methods in the literature use a predefined characteristic scoring function for evaluating the correctness of KG triples. These scoring functions distinguish correct triples (high score) from incorrect ones (low score). However, their performance vary across different datasets. In this work, we demonstrate that a simple neural network based score function can consistently achieve near start-of-the-art performance on multiple datasets. We also quantitatively demonstrate biases in standard benchmark datasets, and highlight the need to perform evaluation spanning various datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.05401v3-abstract-full').style.display = 'none'; document.getElementById('1711.05401v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, submitted to and accepted in Automated Knowledge Base Construction (AKBC) Workshop 2017, at NIPS 2017</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1710.09942">arXiv:1710.09942</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1710.09942">pdf</a>, <a href="https://arxiv.org/format/1710.09942">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CANDiS: Coupled &amp; Attention-Driven Neural Distant Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nagarajan%2C+T">Tushar Nagarajan</a>, <a href="/search/cs?searchtype=author&amp;query=Sharmistha"> Sharmistha</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1710.09942v1-abstract-short" style="display: inline;"> Distant Supervision for Relation Extraction uses heuristically aligned text data with an existing knowledge base as training data. The unsupervised nature of this technique allows it to scale to web-scale relation extraction tasks, at the expense of noise in the training data. Previous work has explored relationships among instances of the same entity-pair to reduce this noise, but relationships a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1710.09942v1-abstract-full').style.display = 'inline'; document.getElementById('1710.09942v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1710.09942v1-abstract-full" style="display: none;"> Distant Supervision for Relation Extraction uses heuristically aligned text data with an existing knowledge base as training data. The unsupervised nature of this technique allows it to scale to web-scale relation extraction tasks, at the expense of noise in the training data. Previous work has explored relationships among instances of the same entity-pair to reduce this noise, but relationships among instances across entity-pairs have not been fully exploited. We explore the use of inter-instance couplings based on verb-phrase and entity type similarities. We propose a novel technique, CANDiS, which casts distant supervision using inter-instance coupling into an end-to-end neural network model. CANDiS incorporates an attention module at the instance-level to model the multi-instance nature of this problem. CANDiS outperforms existing state-of-the-art techniques on a standard benchmark dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1710.09942v1-abstract-full').style.display = 'none'; document.getElementById('1710.09942v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">WiNLP 2017</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1707.01917">arXiv:1707.01917</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1707.01917">pdf</a>, <a href="https://arxiv.org/format/1707.01917">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Higher-order Relation Schema Induction using Tensor Factorization with Back-off and Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nimishakavi%2C+M">Madhav Nimishakavi</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1707.01917v2-abstract-short" style="display: inline;"> Relation Schema Induction (RSI) is the problem of identifying type signatures of arguments of relations from unlabeled text. Most of the previous work in this area have focused only on binary RSI, i.e., inducing only the subject and object type signatures per relation. However, in practice, many relations are high-order, i.e., they have more than two arguments and inducing type signatures of all a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.01917v2-abstract-full').style.display = 'inline'; document.getElementById('1707.01917v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1707.01917v2-abstract-full" style="display: none;"> Relation Schema Induction (RSI) is the problem of identifying type signatures of arguments of relations from unlabeled text. Most of the previous work in this area have focused only on binary RSI, i.e., inducing only the subject and object type signatures per relation. However, in practice, many relations are high-order, i.e., they have more than two arguments and inducing type signatures of all arguments is necessary. For example, in the sports domain, inducing a schema win(WinningPlayer, OpponentPlayer, Tournament, Location) is more informative than inducing just win(WinningPlayer, OpponentPlayer). We refer to this problem as Higher-order Relation Schema Induction (HRSI). In this paper, we propose Tensor Factorization with Back-off and Aggregation (TFBA), a novel framework for the HRSI problem. To the best of our knowledge, this is the first attempt at inducing higher-order relation schemata from unlabeled text. Using the experimental analysis on three real world datasets, we show how TFBA helps in dealing with sparsity and induce higher order schemata. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.01917v2-abstract-full').style.display = 'none'; document.getElementById('1707.01917v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 July, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1610.06912">arXiv:1610.06912</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1610.06912">pdf</a>, <a href="https://arxiv.org/format/1610.06912">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KGEval: Estimating Accuracy of Automatically Constructed Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ojha%2C+P">Prakhar Ojha</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1610.06912v2-abstract-short" style="display: inline;"> Automatic construction of large knowledge graphs (KG) by mining web-scale text datasets has received considerable attention recently. Estimating accuracy of such automatically constructed KGs is a challenging problem due to their size and diversity. This important problem has largely been ignored in prior research we fill this gap and propose KGEval. KGEval binds facts of a KG using coupling const&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1610.06912v2-abstract-full').style.display = 'inline'; document.getElementById('1610.06912v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1610.06912v2-abstract-full" style="display: none;"> Automatic construction of large knowledge graphs (KG) by mining web-scale text datasets has received considerable attention recently. Estimating accuracy of such automatically constructed KGs is a challenging problem due to their size and diversity. This important problem has largely been ignored in prior research we fill this gap and propose KGEval. KGEval binds facts of a KG using coupling constraints and crowdsources the facts that infer correctness of large parts of the KG. We demonstrate that the objective optimized by KGEval is submodular and NP-hard, allowing guarantees for our approximation algorithm. Through extensive experiments on real-world datasets, we demonstrate that KGEval is able to estimate KG accuracy more accurately compared to other competitive baselines, while requiring significantly lesser number of human evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1610.06912v2-abstract-full').style.display = 'none'; document.getElementById('1610.06912v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2016. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Talukdar%2C+P&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10