CINXE.COM

The Stanford Natural Language Processing Group

<!DOCTYPE html> <!--[if lt IE 7]> <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]--> <!--[if IE 7]> <html class="no-js lt-ie9 lt-ie8"> <![endif]--> <!--[if IE 8]> <html class="no-js lt-ie9"> <![endif]--> <!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]--> <head> <meta charset="utf-8"> <meta name="description" content=""> <meta name="viewport" content="width=device-width"> <title>The Stanford Natural Language Processing Group </title> <!-- Bootstrap styles --> <link rel="stylesheet" href="/static/css/vendor/bootstrap/bootstrap.css"> <!-- Glyphicons --> <link rel="stylesheet" href="/static/css/vendor/glyphicons/glyphicons.css"> <link rel="stylesheet" href="/static/css/vendor/glyphicons/filetypes.css"> <link rel="stylesheet" href="/static/css/vendor/glyphicons/social.css"> <!-- Google Webfonts --> <link href='https://fonts.googleapis.com/css?family=Open+Sans:400,700,600' rel='stylesheet' type='text/css'> <!-- fontawesome --> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css"> <link rel="stylesheet" href="/static/css/bootstrap-markdown.min.css" type="text/css"> <!-- LayerSlider styles --> <link rel="stylesheet" href="/static/css/vendor/layerslider/layerslider.css" type="text/css"> <!-- twitter --> <meta property="twitter:account_id" content="118263124"> <meta name="twitter:card" content="summary"> <meta name="twitter:site" content="@stanfordnlp"> <meta name="twitter:title" content="The Stanford NLP Group"> <meta name="twitter:description" content="Performing groundbreaking Natural Language Processing research since 1999."> <meta name="twitter:image" content="https://nlp.stanford.edu/img/Stanford-NLP-stack-small-465-400px.jpg"> <meta name="twitter:url" content="http://nlp.stanford.edu/"> <!-- Google analytics --> <script type="text/javascript"> var _gaq = _gaq || []; _gaq.push(['_setAccount', 'UA-23880590-1']); _gaq.push(['_trackPageview']); (function() { var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true; ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'; var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); </script> <!-- Grove Styles (switch for different color schemes, e.g. "styles-cleanblue.css") --> <link rel="stylesheet" href="/static/css/styles-cleanred.css" id="grove-styles"> <link rel="stylesheet" href="//cdn.datatables.net/1.10.7/css/jquery.dataTables.min.css" type="text/css"> <!-- table highlighting --> <!--<link rel="stylesheet" href="//cdn.datatables.net/plug-ins/1.10.7/features/searchHighlight/dataTables.searchHighlight.css">--> <!-- site-wide custom css --> <link rel="stylesheet" href="/static/css/nlp.css" id="nlp-styles"> <!--[if lt IE 9]> <link rel="stylesheet" href="css/ie8.css"> <script src="js/vendor/google/html5-3.6-respond-1.1.0.min.js"></script> <![endif]--> <script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script> <script>window.jQuery || document.write('<script src="static/js/vendor/jquery/jquery-1.9.1.min.js"><\/script>')</script> <!-- jQuery with jQuery Easing, and jQuery Transit JS --> <!-- LayerSlider from Kreatura Media with Transitions --> <script src="/static/js/vendor/layerslider/greensock.js" type="text/javascript"></script> <script src="/static/js/vendor/layerslider/layerslider.transitions.js" type="text/javascript"></script> <script src="/static/js/vendor/layerslider/layerslider.kreaturamedia.jquery.js" type="text/javascript"></script> <!-- Bootstrap Markdown for Blog editing--> <script src="/static/js/markdown.js" type="text/javascript"></script> <script src="/static/js/to-markdown.js" type="text/javascript"></script> <script src="/static/js/bootstrap-markdown.js" type="text/javascript"></script> <!-- Grove Layerslider initiation script --> <script src="/static/js/grove-slider.js" type="text/javascript"></script> <!-- DataTables.net for publications --> <script src="//cdn.datatables.net/1.10.7/js/jquery.dataTables.min.js"></script> <!-- highlighting for tables --> <script src="//cdn.datatables.net/plug-ins/1.10.7/features/searchHighlight/dataTables.searchHighlight.min.js"></script> <script src="//bartaz.github.io/sandbox.js/jquery.highlight.js"></script> </head> <body> <!-- navigation --> <header> <nav class="navbar navbar-default grove-navbar" role="navigation"> <div class="container"> <div class="navbar-header"> <a href="#" class="grove-toggle collapsed" data-toggle="collapse" data-target=".grove-nav"> <i class="glyphicons show_lines"></i> </a> <img class="navbar-brand navbar-left hidden-xs" src="/static/img/logos/Stanford-NLP-icon-144x72.gif" width="144" height="72"> <a class="navbar-brand navbar-left" href="/"><h3 class="hidden-xs hidden-sm">The Stanford Natural Language Processing Group</h3><h3 class="hidden-md hidden-lg hidden-xl">The Stanford NLP Group</h3></a> </div> <div class="navbar-collapse grove-nav collapse"> <ul class="nav navbar-nav"> <li> <a href="/people/">people</a> </li> <li> <a href="/pubs/">publications</a> </li> <li> <a href="/blog/">research blog</a> </li> <li> <a href="/software/">software</a> </li> <li> <a href="/teaching/">teaching</a> </li> <li> <a href="/prospective/">join</a> </li> <li> <a href="/new_local/">local</a> </li> </ul> </div><!-- /.navbar-collapse --> </div> </nav> </header> <div class="widewrapper"> <!--<div class="container" id="maincontent">--> <div id="body_content"> <div class="widewrapper pagetitle"> <div class="container"> </div> </div> <div class="widewrapper weak-highlight"> <div class="container content"> <center> <h1>The Stanford Natural Language Inference (SNLI) Corpus</h1> </center> <p> Natural Language Inference (NLI), also known as Recognizing Textual Entailment (RTE), is the task of determining the inference relation between two (short, ordered) texts: <em>entailment</em>, <em>contradiction</em>, or <em>neutral</em> (<a href="https://www.aclweb.org/anthology/C08-1066">MacCartney and Manning 2008</a>). </p> <h2>The Corpus</h2> <p> The Stanford Natural Language Inference (SNLI) corpus (version 1.0) is a collection of 570k human-written English sentence pairs manually labeled for balanced classification with the labels <em>entailment</em>, <em>contradiction</em>, and <em>neutral</em>. We aim for it to serve both as a benchmark for evaluating representational systems for text, especially including those induced by representation-learning methods, as well as a resource for developing NLP models of any kind. </p> <p> The following paper introduces the corpus in detail. If you use the corpus in published work, please cite it: </p> <blockquote> <a href="https://cims.nyu.edu/~bowman/">Samuel R. Bowman</a>, <a href="http://cs.stanford.edu/~angeli/">Gabor Angeli</a>, <a href="http://www.stanford.edu/~cgpotts/">Christopher Potts</a>, and <a href="http://nlp.stanford.edu/~manning/">Christopher D. Manning</a>. 2015. A large annotated corpus for learning natural language inference. In <i>Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP)</i>. [<a href="http://nlp.stanford.edu/pubs/snli_paper.pdf">pdf</a>] [<a href="http://nlp.stanford.edu/pubs/snli_paper.bib">bib</a>] </blockquote> <p> Here are a few example pairs taken from the development portion of the corpus. Each has the judgments of five mechanical turk workers and a consensus judgment. </p> <blockquote> <table class="newstuff"> <tr><th>Text</th><th>Judgments</th><th>Hypothesis</th></tr> <tr> <td>A man inspects the uniform of a figure in some East Asian country.</td> <td>contradiction<br>C C C C C</td> <td>The man is sleeping</td> </tr> <tr> <td>An older and younger man smiling.</td> <td> neutral<br>N N E N N</td> <td>Two men are smiling and laughing at the cats playing on the floor.</td> </tr> <tr> <td>A black race car starts up in front of a crowd of people.</td> <td>contradiction<br>C C C C C</td> <td>A man is driving down a lonely road.</td> </tr> <tr> <td>A soccer game with multiple males playing.</td> <td> entailment<br>E E E E E</td> <td>Some men are playing a sport.</td> </tr> <tr> <td>A smiling costumed woman is holding an umbrella.</td> <td>neutral<br>N N E C N</td> <td>A happy woman in a fairy costume holds an umbrella.</td> </tr> </table> </blockquote> <p> The corpus is distributed in both JSON lines and tab separated value files, which are packaged together (with a readme) here: </p> <blockquote> <strong>Download:</strong> <a href="snli_1.0.zip" onclick="_gaq.push(['_trackEvent','Download','ZIP',this.href]);">SNLI 1.0</a> (zip, ~100MB) </blockquote> <p> SNLI is archived at the <a href="http://hdl.handle.net/2451/41728">NYU Faculty Digital Archive</a>. <p> <blockquote><a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-sa/4.0/88x31.png" /></a><br /><span xmlns:dct="http://purl.org/dc/terms/" href="http://purl.org/dc/dcmitype/Text" property="dct:title" rel="dct:type">The Stanford Natural Language Inference Corpus</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="http://nlp.stanford.edu/snli/" property="cc:attributionName" rel="cc:attributionURL">The Stanford NLP Group</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>.<br />Based on a work at <a xmlns:dct="http://purl.org/dc/terms/" href="http://shannon.cs.illinois.edu/DenotationGraph/" rel="dct:source">http://shannon.cs.illinois.edu/DenotationGraph/</a>.</blockquote> <p> The corpus includes content from the <a href="http://shannon.cs.illinois.edu/DenotationGraph/">Flickr 30k corpus</a> (also released under an Attribution-ShareAlike licence), which can be cited by way of this paper: </p> <blockquote> Peter Young, <a href="http://web.engr.illinois.edu/~aylai2/">Alice Lai</a>, Micah Hodosh, and <a href="http://juliahmr.cs.illinois.edu/">Julia Hockenmaier</a>. 2014. <a href="https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/229">From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions</a>. Transactions of the Association for Computational Linguistics 2: 67--78. </blockquote> <p> About 4k sentences in the training set have captionIDs and pairIDs beginning with 'vg_'. These come from a pilot data collection effort that used data from the VisualGenome corpus, which wass still under construction at the time of the release of SNLI. For more information on VisualGenome, see: <a href="https://visualgenome.org/">https://visualgenome.org/</a>. </p> <p> The <em>hard</em> subset of the test set used in Gururangan et al. 2018 is available in JSONL format <a href="snli_1.0_test_hard.jsonl">here</a>. </p> <!-- <br/> --> <h2>Dataset Card</h2> <p> For key information for those considering building applications on this data, see this <a href="https://github.com/huggingface/datasets/tree/master/datasets/snli">dataset card</a> created by the Hugging Face Datasets team. </p> <h2>Published results</h2> The following table reflects our informal attempt to catalog published 3-class classification results on the SNLI test set. We define sentence vector-based models as those which perform classification on the sole basis of a pair of fixed-size sentence representations that are computed independently of one another. Reported parameter counts do not include word embeddings. If you would like to add a paper that reports a number at or above the current state of the art, email <a href="mailto:bowman@nyu.edu">Sam</a>. <h3>Three-way classification</h3> <blockquote> <table class="newstuff"> <tr class="header"> <th>Publication</th> <th>&nbsp;Model</th> <th>Parameters</th> <th>&nbsp;Train (% acc)</th> <th>&nbsp;Test (% acc)</th> </tr> <tr class="section"> <th colspan="5" style="background-color:transparent; color:#646464;">Feature-based models</th> </tr> <tr> <td><a href="http://nlp.stanford.edu/pubs/snli_paper.pdf">Bowman et al. '15</a></td> <td>Unlexicalized features</td> <td></td> <td style="text-align: right">49.4</td> <td style="text-align: right">50.4</td> </tr> <tr> <td><a href="http://nlp.stanford.edu/pubs/snli_paper.pdf">Bowman et al. '15</a></td> <td>+ Unigram and bigram features</td> <td></td> <td style="text-align: right">99.7</td> <td style="text-align: right"><em>78.2</em></td> </tr> <tr class="section"> <th colspan="5" style="background-color:transparent; color:#646464;">Sentence vector-based models</th> </tr> <tr> <td><a href="http://nlp.stanford.edu/pubs/snli_paper.pdf">Bowman et al. '15</a></td> <td>100D LSTM encoders</td> <td style="text-align: right">220k</td> <td style="text-align: right">84.8</td> <td style="text-align: right">77.6</td> </tr> <tr> <td><a href="https://www.nyu.edu/projects/bowman/spinn.pdf">Bowman et al. '16</a></td> <td>300D LSTM encoders</td> <td style="text-align: right">3.0m</td> <td style="text-align: right">83.9</td> <td style="text-align: right">80.6</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1511.06361v3.pdf">Vendrov et al. '15</a></td> <td>1024D GRU encoders w/ unsupervised 'skip-thoughts' pre-training</td> <td style="text-align: right">15m</td> <td style="text-align: right">98.8</td> <td style="text-align: right">81.4</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1512.08422.pdf">Mou et al. '15</a></td> <td>300D Tree-based CNN encoders</td> <td style="text-align: right">3.5m</td> <td style="text-align: right">83.3</td> <td style="text-align: right">82.1</td> </tr> <tr> <td><a href="https://www.nyu.edu/projects/bowman/spinn.pdf">Bowman et al. '16</a></td> <td>300D SPINN-PI encoders</td> <td style="text-align: right">3.7m</td> <td style="text-align: right">89.2</td> <td style="text-align: right">83.2</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1605.09090v1.pdf">Yang Liu et al. '16</a></td> <td>600D (300+300) BiLSTM encoders</td> <td style="text-align: right">2.0m</td> <td style="text-align: right">86.4</td> <td style="text-align: right">83.3</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1607.04492v1.pdf">Munkhdalai &amp; Yu '16b</a></td> <td>300D NTI-SLSTM-LSTM encoders</td> <td style="text-align: right">4.0m</td> <td style="text-align: right">82.5</td> <td style="text-align: right">83.4</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1605.09090v1.pdf">Yang Liu et al. '16</a></td> <td>600D (300+300) BiLSTM encoders with intra-attention</td> <td style="text-align: right">2.8m</td> <td style="text-align: right">84.5</td> <td style="text-align: right">84.2</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1705.02364.pdf">Conneau et al. '17</a></td> <td>4096D BiLSTM with max-pooling</td> <td style="text-align: right">40m</td> <td style="text-align: right">85.6</td> <td style="text-align: right">84.5</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1607.04315.pdf">Munkhdalai &amp; Yu '16a</a></td> <td>300D NSE encoders</td> <td style="text-align: right">3.0m</td> <td style="text-align: right">86.2</td> <td style="text-align: right">84.6</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1708.01353.pdf">Qian Chen et al. '17</a></td> <td>600D (300+300) Deep Gated Attn. BiLSTM encoders (<a href="https://github.com/lukecq1231/enc_nli/">code</a>)</td> <td style="text-align: right">12m</td> <td style="text-align: right">90.5</td> <td style="text-align: right">85.5</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1709.04696.pdf">Tao Shen et al. '17</a></td> <td>300D Directional self-attention network encoders (<a href="https://github.com/taoshen58/DiSAN">code</a>)</td> <td style="text-align: right">2.4m</td> <td style="text-align: right">91.1</td> <td style="text-align: right">85.6</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1707.02786.pdf">Jihun Choi et al. '17</a></td> <td>300D Gumbel TreeLSTM encoders</td> <td style="text-align: right">2.9m</td> <td style="text-align: right">91.2</td> <td style="text-align: right">85.6</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1708.02312.pdf">Nie and Bansal '17</a></td> <td>300D Residual stacked encoders</td> <td style="text-align: right">9.7m</td> <td style="text-align: right">89.8</td> <td style="text-align: right">85.7</td> </tr> <tr> <td><a href="https://openreview.net/forum?id=rylNzLFEkm">Anonymous '18</a></td> <td>1200D REGMAPR (Base+Reg)</td> <td style="text-align: right">–</td> <td style="text-align: right">–</td> <td style="text-align: right">85.9</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1801.00102.pdf">Yi Tay et al. '18</a></td> <td>300D CAFE (no cross-sentence attention)</td> <td style="text-align: right">3.7m</td> <td style="text-align: right">87.3 </td> <td style="text-align: right">85.9</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1707.02786.pdf">Jihun Choi et al. '17</a></td> <td>600D Gumbel TreeLSTM encoders</td> <td style="text-align: right">10m</td> <td style="text-align: right">93.1</td> <td style="text-align: right">86.0</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1708.02312.pdf">Nie and Bansal '17</a></td> <td>600D Residual stacked encoders</td> <td style="text-align: right">29m</td> <td style="text-align: right">91.0</td> <td style="text-align: right">86.0</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1801.10296.pdf">Tao Shen et al. '18</a></td> <td>300D Reinforced Self-Attention Network</td> <td style="text-align: right">3.1m</td> <td style="text-align: right">92.6</td> <td style="text-align: right">86.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1712.02047.pdf">Im and Cho '17</a></td> <td>Distance-based Self-Attention Network</td> <td style="text-align: right">4.7m</td> <td style="text-align: right">89.6</td> <td style="text-align: right">86.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1805.11360.pdf">Seonhoon Kim et al. '18</a></td> <td>Densely-Connected Recurrent and Co-Attentive Network (encoder)</td> <td style="text-align: right">5.6m</td> <td style="text-align: right">91.4</td> <td style="text-align: right"><em>86.5</em></td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1808.08762.pdf">Talman et al. '18</a></td> <td>600D Hierarchical BiLSTM with Max Pooling (HBMP, <a href="https://github.com/Helsinki-NLP/HBMP">code</a>)</td> <td style="text-align: right">22m</td> <td style="text-align: right">89.9</td> <td style="text-align: right"><em>86.6</em></td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1806.09828.pdf">Qian Chen et al. '18</a></td> <td>600D BiLSTM with generalized pooling</td> <td style="text-align: right">65m</td> <td style="text-align: right">94.9</td> <td style="text-align: right"><em>86.6</em></td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1804.07983.pdf">Kiela et al. '18</a></td> <td>512D Dynamic Meta-Embeddings</td> <td style="text-align: right">9m</td> <td style="text-align: right">91.6</td> <td style="text-align: right"><em>86.7</em></td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1808.07383.pdf">Deunsol Yoon et al. '18</a></td> <td>600D Dynamic Self-Attention Model</td> <td style="text-align: right">2.1m</td> <td style="text-align: right">87.3</td> <td style="text-align: right"><em>86.8</em></td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1808.07383.pdf">Deunsol Yoon et al. '18</a></td> <td>2400D Multiple-Dynamic Self-Attention Model</td> <td style="text-align: right">7.0m</td> <td style="text-align: right">89.0</td> <td style="text-align: right"><em>87.4</em></td> </tr> <tr class="section"> <th colspan="5" style="background-color:transparent; color:#646464;">Other neural network models (usually with attention between text and hypothesis words)</th> </tr> <tr> <td><a href="http://arxiv.org/pdf/1509.06664v1.pdf">Rocktäschel et al. '15</a></td> <td>100D LSTMs w/ word-by-word attention</td> <td style="text-align: right">250k</td> <td style="text-align: right">85.3</td> <td style="text-align: right">83.5</td> </tr> <tr> <td><a href="https://pdfs.semanticscholar.org/adc1/84fcb04107f95e35ea1b07ef9aad749da8d7.pdf">Pengfei Liu et al. '16a</a></td> <td>100D DF-LSTM</td> <td style="text-align: right">320k</td> <td style="text-align: right">85.2</td> <td style="text-align: right">84.6</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1605.09090v1.pdf">Yang Liu et al. '16</a></td> <td>600D (300+300) BiLSTM encoders with intra-attention and symbolic preproc.</td> <td style="text-align: right">2.8m</td> <td style="text-align: right">85.9</td> <td style="text-align: right">85.0</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1605.05573v2.pdf">Pengfei Liu et al. '16b</a></td> <td>50D stacked TC-LSTMs</td> <td style="text-align: right">190k</td> <td style="text-align: right">86.7</td> <td style="text-align: right">85.1</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1607.04315.pdf">Munkhdalai &amp; Yu '16a</a></td> <td>300D MMA-NSE encoders with attention</td> <td style="text-align: right">3.2m</td> <td style="text-align: right">86.9</td> <td style="text-align: right">85.4</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1512.08849v1.pdf">Wang &amp; Jiang '15</a></td> <td>300D mLSTM word-by-word attention model</td> <td style="text-align: right">1.9m</td> <td style="text-align: right">92.0</td> <td style="text-align: right"><stsrong>86.1</stsrong></td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1601.06733.pdf">Jianpeng Cheng et al. '16</a></td> <td>300D LSTMN with deep attention fusion</td> <td style="text-align: right">1.7m</td> <td style="text-align: right">87.3</td> <td style="text-align: right">85.7</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1601.06733.pdf">Jianpeng Cheng et al. '16</a></td> <td>450D LSTMN with deep attention fusion</td> <td style="text-align: right">3.4m</td> <td style="text-align: right">88.5</td> <td style="text-align: right">86.3</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1606.01933v1.pdf">Parikh et al. '16</a></td> <td>200D decomposable attention model</td> <td style="text-align: right">380k</td> <td style="text-align: right">89.5</td> <td style="text-align: right">86.3</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1606.01933v1.pdf">Parikh et al. '16</a></td> <td>200D decomposable attention model with intra-sentence attention</td> <td style="text-align: right">580k</td> <td style="text-align: right">90.5</td> <td style="text-align: right">86.8</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1607.04492v1.pdf">Munkhdalai &amp; Yu '16b</a></td> <td>300D Full tree matching NTI-SLSTM-LSTM w/ global attention</td> <td style="text-align: right">3.2m</td> <td style="text-align: right">88.5</td> <td style="text-align: right">87.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1702.03814.pdf">Zhiguo Wang et al. '17</a></td> <td>BiMPM</td> <td style="text-align: right">1.6m</td> <td style="text-align: right">90.9 </td> <td style="text-align: right">87.5</td> </tr> <tr> <td><a href="https://www.aclweb.org/anthology/C/C16/C16-1270.pdf">Lei Sha et al. '16</a></td> <td>300D re-read LSTM</td> <td style="text-align: right">2.0m</td> <td style="text-align: right">90.7</td> <td style="text-align: right">87.5</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1709.04348.pdf">Yichen Gong et al. '17</a></td> <td>448D Densely Interactive Inference Network (DIIN, <a href="https://github.com/YichenGong/Densely-Interactive-Inference-Network">code</a>)</td> <td style="text-align: right">4.4m</td> <td style="text-align: right">91.2</td> <td style="text-align: right">88.0</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1708.00107.pdf">McCann et al. '17</a></td> <td>Biattentive Classification Network + CoVe + Char</td> <td style="text-align: right">22m</td> <td style="text-align: right">88.5</td> <td style="text-align: right">88.1</td> </tr> <tr> <td><a href="https://www.ijcai.org/proceedings/2018/0613.pdf">Chuanqi Tan et al. '18</a></td> <td>150D Multiway Attention Network</td> <td style="text-align: right">14m</td> <td style="text-align: right">94.5 </td> <td style="text-align: right">88.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1804.07888.pdf">Xiaodong Liu et al. '18</a></td> <td>Stochastic Answer Network</td> <td style="text-align: right">3.5m</td> <td style="text-align: right">93.3</td> <td style="text-align: right">88.5</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1802.05577.pdf">Ghaeini et al. '18</a></td> <td>450D DR-BiLSTM</td> <td style="text-align: right">7.5m</td> <td style="text-align: right">94.1 </td> <td style="text-align: right">88.5</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1801.00102.pdf">Yi Tay et al. '18</a></td> <td>300D CAFE</td> <td style="text-align: right">4.7m</td> <td style="text-align: right">89.8 </td> <td style="text-align: right">88.5</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1711.04289.pdf">Qian Chen et al. '17</a></td> <td>KIM</td> <td style="text-align: right">4.3m</td> <td style="text-align: right">94.1 </td> <td style="text-align: right">88.6</td> </tr> <tr> <td><a href="http://arxiv.org/pdf/1609.06038v3.pdf">Qian Chen et al. '16</a></td> <td>600D ESIM + 300D Syntactic TreeLSTM (<a href="https://github.com/lukecq1231/nli">code</a>)</td> <td style="text-align: right">7.7m</td> <td style="text-align: right">93.5 </td> <td style="text-align: right">88.6</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1802.05365.pdf">Peters et al. '18</a></td> <td>ESIM + ELMo</td> <td style="text-align: right">8.0m</td> <td style="text-align: right">91.6 </td> <td style="text-align: right">88.7</td> </tr> <tr> <td><a href="http://aclweb.org/anthology/P18-1091">Boyuan Pan et al. '18</a></td> <td>300D DMAN</td> <td style="text-align: right">9.2m</td> <td style="text-align: right">95.4 </td> <td style="text-align: right">88.8</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1702.03814.pdf">Zhiguo Wang et al. '17</a></td> <td>BiMPM <strong>Ensemble</strong></td> <td style="text-align: right">6.4m</td> <td style="text-align: right">93.2 </td> <td style="text-align: right">88.8</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1709.04348.pdf">Yichen Gong et al. '17</a></td> <td>448D Densely Interactive Inference Network (DIIN, <a href="https://github.com/YichenGong/Densely-Interactive-Inference-Network">code</a>) <strong>Ensemble</strong></td> <td style="text-align: right">17m</td> <td style="text-align: right">92.3 </td> <td style="text-align: right">88.9</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1805.11360.pdf">Seonhoon Kim et al. '18</a></td> <td>Densely-Connected Recurrent and Co-Attentive Network</td> <td style="text-align: right">6.7m</td> <td style="text-align: right">93.1</td> <td style="text-align: right">88.9</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1711.04289.pdf">Qian Chen et al. '17</a></td> <td>KIM <strong>Ensemble</strong></td> <td style="text-align: right">43m</td> <td style="text-align: right">93.6 </td> <td style="text-align: right">89.1</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1802.05577.pdf">Ghaeini et al. '18</a></td> <td>450D DR-BiLSTM <strong>Ensemble</strong></td> <td style="text-align: right">45m</td> <td style="text-align: right">94.8 </td> <td style="text-align: right">89.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1802.05365.pdf">Peters et al. '18</a></td> <td>ESIM + ELMo <strong>Ensemble</strong></td> <td style="text-align: right">40m</td> <td style="text-align: right">92.1 </td> <td style="text-align: right">89.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1801.00102.pdf">Yi Tay et al. '18</a></td> <td>300D CAFE <strong>Ensemble</strong></td> <td style="text-align: right">17.5m</td> <td style="text-align: right">92.5 </td> <td style="text-align: right">89.3</td> </tr> <tr> <td><a href="https://www.ijcai.org/proceedings/2018/0613.pdf">Chuanqi Tan et al. '18</a></td> <td>150D Multiway Attention Network <strong>Ensemble</strong></td> <td style="text-align: right">58m</td> <td style="text-align: right">95.5 </td> <td style="text-align: right">89.4</td> </tr> <tr> <td><a href="http://aclweb.org/anthology/P18-1091">Boyuan Pan et al. '18</a></td> <td>300D DMAN <strong>Ensemble</strong></td> <td style="text-align: right">79m</td> <td style="text-align: right">96.1 </td> <td style="text-align: right">89.6</td> </tr> <tr> <td><a href="https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf">Radford et al. '18</a></td> <td>Fine-Tuned LM-Pretrained Transformer</td> <td style="text-align: right">85m</td> <td style="text-align: right">96.6</td> <td style="text-align: right">89.9</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1805.11360.pdf">Seonhoon Kim et al. '18</a></td> <td>Densely-Connected Recurrent and Co-Attentive Network <strong>Ensemble</strong></td> <td style="text-align: right">53.3m</td> <td style="text-align: right">95.0</td> <td style="text-align: right">90.1</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1809.02794.pdf">Zhuosheng Zhang et al. '19a</a></td> <td>SJRC (BERT-Large +SRL)</td> <td style="text-align: right">308m</td> <td style="text-align: right">95.7 </td> <td style="text-align: right">91.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1901.11504.pdf">Xiaodong Liu et al. '19</a></td> <td>MT-DNN</td> <td style="text-align: right">330m</td> <td style="text-align: right">97.2</td> <td style="text-align: right">91.6</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/1909.02209.pdf">Zhousheng Zhang et al. '19b</a></td> <td>SemBERT</td> <td style="text-align: right">339m</td> <td style="text-align: right">94.4</td> <td style="text-align: right">91.9</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/2009.09139.pdf">Pilault et al. '20</a></td> <td>CA-MTL</td> <td style="text-align: right">340m</td> <td style="text-align: right">92.6</td> <td style="text-align: right">92.1</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/2012.01786.pdf">Sun et al., ’20</a></td> <td>RoBERTa-large + self-explaining layer</td> <td style="text-align: right">355m+</td> <td style="text-align: right">?</td> <td style="text-align: right">92.3</td> </tr> <tr> <td><a href="https://arxiv.org/pdf/2104.14690v1.pdf">Wang et al., ’21</a></td> <td>EFL (Entailment as Few-shot Learner) + RoBERTa-large</td> <td style="text-align: right">355m</td> <td style="text-align: right">?</td> <td style="text-align: right"><strong>93.1</strong></td> </tr> </table> </blockquote> <h2>Related Resources</h2> <ul> <li><a href="https://www.philips.com/c-dam/corporate/research/research-programs/Data.zip">A spell-checked version of the test and development sets.</a> (Warning: Results on these sets are not directly comparable to results on the regular dev and test sets, and will not be listed here.)</li> <li><a href="https://cims.nyu.edu/~sbowman/multinli/">The MultiGenre NLI (MultiNLI or MNLI) Corpus</a>. The corpus is in the same format as SNLI and is comparable in size, but it includes a more diverse variety of text styles and topics, as well as an auxiliary test set for cross-genre transfer evaluation. </li> <li>The <a href="http://www-nlp.stanford.edu/~wcmac/downloads/">FraCaS test suite</a> for natural language inference, in XML format</li> <li><a href="https://physionet.org/physiotools/mimic-code/mednli/">MedNLI</a>: A Natural Language Inference Dataset For The Clinical Domain</a></li> <li><a href="https://www.nyu.edu/projects/bowman/xnli/">XNLI</a>: A Cross-Lingual Natural Language Inference Evaluation Set</a></li> <li><a href="https://github.com/OanaMariaCamburu/e-SNLI">e-SNLI</a>: Explanation annotations over SNLI.</a></li> </ul> <!-- <br/> --> <h2>Contact Information</h2> <p> For any comments or questions, please email <a href="mailto:bowman@nyu.edu">Sam</a>, <a href="mailto:angeli@cs.stanford.edu">Gabor</a>, and <a href="mailto:manning@stanford.edu">Chris</a>. </p> </div></div> </div> <!--</div>--><!-- /container --> </div> <footer> <div id="footer" class="container"> <footer class="widewrapper footer"> <div class="container"> <div class="row footer"> <div class="col-sm-3"> <h4>Stanford NLP Group</h4> Gates Computer Science Building<br> 353 Jane Stanford Way<br> Stanford, CA 94305-9020<br> <a href="http://forum.stanford.edu/visitors/directions/gates.php">Directions and Parking</a> </div> <div class="col-sm-3"> <div class="indent30"> <h4>Affiliated Groups</h4> <ul class="grove-list"> <li><a href="http://ai.stanford.edu/">Stanford AI Lab</a></li> <li><a href="http://infolab.stanford.edu/">Stanford InfoLab</a></li> <li><a href="https://www-csli.stanford.edu/">Center for the Study of Language and Information</a></li> </ul> </div> </div> <div class="col-sm-3"> <!--<a href="https://docs.google.com/forms/d/19uyf5YSiQd9PGkl26Lnbpze3Ab_ipTSoMNIctrVQG2o/viewform"> <h2>BUG REPORT!</h2> </a>--> </div> <div class="col-sm-3"> <h4>Connect</h4> <div class="row"> <ul> <li><a href="http://stackoverflow.com/tags/stanford-nlp">Stack Overflow</a></li> <li><a href="https://github.com/stanfordnlp/CoreNLP">Github</a></li> <li><a href="https://twitter.com/stanfordnlp">Twitter</a></li> </ul> </div> <!-- <div class="row center-block"> <a href="https://github.com/stanfordnlp/CoreNLP" class="social github"></a> <a href="https://twitter.com/stanfordnlp" class="social twitter"></a> </div> --> </div> </div> <div class="small row" align="right"> Local links: &nbsp; <a href="/local/nlp_lunch.shtml">NLP lunch</a> &middot; <!-- <img src="/img/middot.gif" align="center"> --> <a href="http://nlp.stanford.edu/read/">NLP Reading Group</a> &middot; <a href="http://nlp.stanford.edu/seminar/">NLP Seminar</a> &middot; <a href="http://ai.stanford.edu/portfolio-view/distinguished-speaker-series">AI Speakers</a> &middot; <a href="/javanlp/">JavaNLP</a> (<a href="/nlp/javadoc/javanlp/">javadocs</a>) &middot; <a href="/local/machines.shtml">machines</a> &middot; <a href="/wiki/">Wiki</a> &middot; <a href="/local/calendar.shtml">Calendar</a> &middot; <a href="/local/qa/">Q&amp;A</a> </div> </div> </footer> </div><!-- /footer --> </footer> <script src="/static/js/vendor/bootstrap/bootstrap.min.js"></script> <script src="/static/js/vendor/modernizr/modernizr.js"></script> <script> $('ul.nav > li > a[href="' + document.location.pathname + '"]').parent().addClass('active'); </script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10