CINXE.COM

<!doctype html><html lang=en-us><head><meta charset=utf-8><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1,shrink-to-fit=no"><title>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories - ACL Anthology</title> <meta name=generator content="Hugo 0.140.2"><link href=/aclicon.ico rel="shortcut icon" type=image/x-icon><link rel=stylesheet href=/css/main.min.b53e37419139f6302a0be966257012b1d281f3350967383f227f76c0ecaeff80.css media=screen><link rel=stylesheet href=https://use.fontawesome.com/releases/v5.7.2/css/all.css integrity=sha384-fnmOCqbTlWIlj8LyTjo7mOUStjsKC4pOpQbqyi7RrhN7udi9RwhKkMHpvLbHG9Sr crossorigin=anonymous><link rel=stylesheet href=/css/academicons.min.css><meta content="When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories" name=citation_title><meta content="Alex Mallen" name=citation_author><meta content="Akari Asai" name=citation_author><meta content="Victor Zhong" name=citation_author><meta content="Rajarshi Das" name=citation_author><meta content="Daniel Khashabi" name=citation_author><meta content="Hannaneh Hajishirzi" name=citation_author><meta content="Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)" name=citation_conference_title><meta content="2023/7" name=citation_publication_date><meta content="https://aclanthology.org/2023.acl-long.546.pdf" name=citation_pdf_url><meta content="9802" name=citation_firstpage><meta content="9822" name=citation_lastpage><meta content="10.18653/v1/2023.acl-long.546" name=citation_doi><meta property="og:title" content="When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories"><meta property="og:image" content="https://aclanthology.org/thumb/2023.acl-long.546.jpg"><meta property="og:image:alt" content="First page of paper PDF."><meta property="og:type" content="article"><meta property="og:site_name" content="ACL Anthology"><meta property="og:url" content="https://aclanthology.org/2023.acl-long.546/"><meta property="og:description" content="Alex Mallen, Akari Asai, Victor Zhong, Rajarshi Das, Daniel Khashabi, Hannaneh Hajishirzi. Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). 2023."><link rel=canonical href=https://aclanthology.org/2023.acl-long.546/></head><body><nav class="navbar navbar-expand-sm navbar-light bg-light bg-gradient-light shadow-sm py-0 mb-3 mb-md-4 mb-xl-5"><div id=navbar-container class=container><a class=navbar-brand href=https://aclanthology.org/><img src=https://aclanthology.org/images/acl-logo.svg width=56 alt="ACL Logo"> <span class="d-inline pl-2">ACL Anthology</span> </a><button class=navbar-toggler type=button data-toggle=collapse data-target=#navbarSupportedContent aria-controls=navbarSupportedContent aria-expanded=false aria-label="Toggle navigation"> <span class=navbar-toggler-icon></span></button><div class="collapse navbar-collapse" id=navbarSupportedContent><ul class="navbar-nav flex-grow-1 pr-md-2"><li class=nav-item><a class=nav-link href=/posts/>News<span class=sr-only>(current)</span></a></li><li class=nav-item><a class=nav-link href=/faq/>FAQ<span class=sr-only>(current)</span></a></li><li class=nav-item><a class=nav-link href=/info/corrections/>Corrections<span class=sr-only>(current)</span></a></li><li class=nav-item><a class=nav-link href=/info/contrib/>Submissions<span class=sr-only>(current)</span></a></li><li class=nav-item><a class=nav-link href=https://github.com/acl-org/acl-anthology/><i class="fab fa-github pr-1"></i>Github</a></li></ul><form class="form-inline my-2 my-lg-0 flex-nowrap" action=/search/? method=get><input id=acl-search-box class="form-control mr-sm-2" name=q type=search placeholder=Search... aria-label=Search> <button class="btn btn-outline-primary" type=submit><i class="fas fa-search"></i></button></form></div></div></nav><div id=main-container class=container><section id=main><div><h2 id=title><a href=https://aclanthology.org/2023.acl-long.546.pdf>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories</a></h2><p class=lead><a href=/people/a/alex-mallen/>Alex Mallen</a>, <a href=/people/a/akari-asai/>Akari Asai</a>, <a href=/people/v/victor-zhong/>Victor Zhong</a>, <a href=/people/r/rajarshi-das/>Rajarshi Das</a>, <a href=/people/d/daniel-khashabi/>Daniel Khashabi</a>, <a href=/people/h/hannaneh-hajishirzi/>Hannaneh Hajishirzi</a></p></div><div class="modal fade" id=metadataModal tabindex=-1 aria-labelledby=metadataModalLabel aria-hidden=true><div class="modal-dialog modal-lg"><div class=modal-content><div class=modal-header><h5 class=modal-title>Correct Metadata for <span id=paperIdSpan></span></h5><button class=close data-dismiss=modal aria-label=Close> <span aria-hidden=true>×</span></button></div><div class=modal-body><form id=metadataForm><div class="alert alert-warning" role=alert><b>Important</b>: The Anthology treat PDFs as authoritative. Please use this form only to correct data that is out of line with the PDF. See <a href=https://aclanthology.org/info/corrections/>our corrections guidelines</a> if you need to change the PDF.</div><div class=mb-3><label for=paperTitle class=form-label>Title</label> <small id=paperTitleHelp class="form-text text-muted">Adjust the title. Retain tags such as <fixed-case>.</small> <input type=text class=form-control id=paperTitle></div><label class=form-label>Authors</label> <small id=authorTitleHelp class="form-text text-muted">Adjust author names and order to match the PDF.</small><div id=authorsContainer class=px-3 ondrop=dropAuthor(event) ondragover=allowDrop(event)></div><button type=button class="btn btn-secondary btn-sm mb-3" onclick=addAuthor()>Add Author</button><div class=mb-3><label for=paperAbstract class=form-label>Abstract</label> <small id=abstractTitleHelp class="form-text text-muted">Correct abstract if needed. Retain XML formatting tags such as <tex-math>.</small> <textarea class=form-control id=paperAbstract rows=6></textarea></div></form></div><div class="modal-footer d-flex align-items-center"><div class="form-check mb-0"><input type=checkbox class=form-check-input id=pdfCorrectionCheck> <label class=form-check-label for=pdfCorrectionCheck>ALL author names, the title, and the abstract match the PDF. If paper metadata matches the PDF, but the paper should be linked to a different author page, please file an <a href="https://github.com/acl-org/acl-anthology/issues/new?assignees=anthology-assist&labels=correction%2Cmetadata&projects=&template=02-name-correction.yml&title=Author+Page%3A+%7Breplace+with+author+name%7D">author page correction</a> instead.</label></div><button type=button class="btn btn-primary" onclick=submitMetadataCorrection()>Submit</button></div></div></div></div><hr><div class="row acl-paper-details"><div class="col col-lg-10 order-2"><div class="card bg-light mb-2 mb-lg-3"><div class="card-body acl-abstract"><h5 class=card-title>Abstract</h5><span>Despite their impressive performance on diverse tasks, large language models (LMs) still struggle with tasks requiring rich world knowledge, implying the difficulty of encoding a wealth of world knowledge in their parameters. This paper aims to understand LMs’ strengths and limitations in memorizing factual knowledge, by conducting large-scale knowledge probing experiments on two open-domain entity-centric QA datasets: PopQA, our new dataset with 14k questions about long-tail entities, and EntityQuestions, a widely used open-domain QA dataset. We find that LMs struggle with less popular factual knowledge, and that retrieval augmentation helps significantly in these cases. Scaling, on the other hand, mainly improves memorization of popular knowledge, and fails to appreciably improve memorization of factual knowledge in the tail. Based on those findings, we devise a new method for retrieval-augmentation that improves performance and reduces inference costs by only retrieving non-parametric memories when necessary.</span></div></div><dl><dt>Anthology ID:</dt><dd>2023.acl-long.546</dd><dt>Volume:</dt><dd><a href=/volumes/2023.acl-long/>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</a></dd><dt>Month:</dt><dd>July</dd><dt>Year:</dt><dd>2023</dd><dt>Address:</dt><dd>Toronto, Canada</dd><dt>Editors:</dt><dd><a href=/people/a/anna-rogers/>Anna Rogers</a>, <a href=/people/j/jordan-boyd-graber/>Jordan Boyd-Graber</a>, <a href=/people/n/naoaki-okazaki/>Naoaki Okazaki</a></dd><dt>Venue:</dt><dd><a href=/venues/acl/>ACL</a></dd><dt>SIG:</dt><dd></dd><dt>Publisher:</dt><dd>Association for Computational Linguistics</dd><dt>Note:</dt><dd></dd><dt>Pages:</dt><dd>9802–9822</dd><dt>Language:</dt><dd></dd><dt>URL:</dt><dd><a href=https://aclanthology.org/2023.acl-long.546/>https://aclanthology.org/2023.acl-long.546/</a></dd><dt>DOI:</dt><dd><a href=https://doi.org/10.18653/v1/2023.acl-long.546 title="To the current version of the paper by DOI">10.18653/v1/2023.acl-long.546</a></dd><dt class=acl-button-row>Bibkey:</dt><dd class=acl-button-row><button type=button class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target=#citePaperBibkey><i class="far fa-clipboard"></i><span id=citePaperBibkey class="pl-2 text-monospace">mallen-etal-2023-trust</span></button></dd><dt>Cite (ACL):</dt><dd><span id=citeACL>Alex Mallen, Akari Asai, Victor Zhong, Rajarshi Das, Daniel Khashabi, and Hannaneh Hajishirzi. 2023. <a href=https://aclanthology.org/2023.acl-long.546/>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories</a>. In <i>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</i>, pages 9802–9822, Toronto, Canada. Association for Computational Linguistics.</span><button type=button class="btn btn-clipboard btn-secondary btn-sm d-none ml-2" data-clipboard-target=#citeACL><i class="far fa-clipboard"></i></button></dd><dt>Cite (Informal):</dt><dd><span id=citeRichText><a href=https://aclanthology.org/2023.acl-long.546/>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories</a> (Mallen et al., ACL 2023)</span><button type=button class="btn btn-clipboard btn-secondary btn-sm d-none ml-2" data-clipboard-target=#citeRichText><i class="far fa-clipboard"></i></button></dd><dt class=acl-button-row>Copy Citation:</dt><dd class=acl-button-row><button type=button class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target=#citeBibtexContent><i class="far fa-clipboard pr-2"></i>BibTeX</button> <button type=button class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target=#citeMarkdownContent><i class="far fa-clipboard pr-2"></i>Markdown</button> <button type=button class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target=#citeModsContent><i class="far fa-clipboard pr-2"></i>MODS XML</button> <button type=button class="btn btn-clipboard-outside btn-secondary btn-sm d-none" data-clipboard-target=#citeEndnoteContent><i class="far fa-clipboard pr-2"></i>Endnote</button> <button type=button class="btn btn-secondary btn-sm" data-toggle=modal data-target=#citeModal>More options…</button></dd><dt>PDF:</dt><dd><a href=https://aclanthology.org/2023.acl-long.546.pdf>https://aclanthology.org/2023.acl-long.546.pdf</a></dd><dt class=acl-button-row>Video:</dt><dd class=acl-button-row><a href=https://aclanthology.org/2023.acl-long.546.mp4 class="btn btn-attachment btn-sm"><i class="fas fa-video"></i> https://aclanthology.org/2023.acl-long.546.mp4</a></dd></dl></div><div class=acl-paper-link-block><a class="btn btn-primary" href=https://aclanthology.org/2023.acl-long.546.pdf title="Open PDF of 'When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories'"><i class="far fa-file-pdf"></i><span class=pl-2>PDF</span> </a><a class="btn btn-secondary" title="Open dialog for exporting citations" data-toggle=modal data-target=#citeModal href=#><i class="fas fa-quote-left"></i><span class=pl-2>Cite</span> </a><a class="btn btn-secondary" href="https://www.semanticscholar.org/search?q=When+Not+to+Trust+Language+Models%3A+Investigating+Effectiveness+of+Parametric+and+Non-Parametric+Memories" title="Search for 'When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories' on Semantic Scholar"><i class="ai ai-semantic-scholar"></i><span class="pl-sm-2 d-none d-sm-inline">Search</span> </a><a class="btn btn-attachment d-flex flex-wrap justify-content-center" href=https://aclanthology.org/2023.acl-long.546.mp4 title="Open video for 'When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories'"><span class="align-self-center px-1"><i class="fas fa-video"></i></span> <span class=px-1>Video</span> </a><a class="btn btn-warning d-flex flex-wrap justify-content-center" href=# title="Correct problems with title, author list, and abstract" onclick=showMetadataDialog()><span class="d-none d-sm-inline"><i class="fas fa-edit"></i></span> <span class=pl-md-2>Fix data</span></a></div></div><hr><div class="modal fade" id=citeModal tabindex=-1 role=dialog aria-labelledby=citeModalLabel aria-hidden=true><div class="modal-dialog modal-lg" role=document><div class=modal-content><div class=modal-header><h5 class=modal-title id=citeModalLabel>Export citation</h5><button class=close data-dismiss=modal aria-label=Close> <span aria-hidden=true>×</span></button></div><div class=modal-body><ul class="nav nav-tabs mb-2" id=citeFormats role=tablist><li class=nav-item><a class="nav-link active" data-toggle=list href=#citeBibtex role=tab aria-controls=citeBibtex aria-selected=true>BibTeX</a></li><li class=nav-item><a class=nav-link data-toggle=list href=#citeMods role=tab aria-controls=citeMods aria-selected=false>MODS XML</a></li><li class=nav-item><a class=nav-link data-toggle=list href=#citeEndnote role=tab aria-controls=citeEndnote aria-selected=false>Endnote</a></li><li class=nav-item><a class=nav-link data-toggle=list href=#citeMarkdown role=tab aria-controls=citeMarkdown aria-selected=false>Preformatted</a></li></ul><div class=tab-content id=citeFormatsContent><div class="tab-pane active" id=citeBibtex role=tabpanel><pre id=citeBibtexContent class="bg-light border p-2" style=max-height:50vh>@inproceedings{mallen-etal-2023-trust, title = "When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories", author = "Mallen, Alex and Asai, Akari and Zhong, Victor and Das, Rajarshi and Khashabi, Daniel and Hajishirzi, Hannaneh", editor = "Rogers, Anna and Boyd-Graber, Jordan and Okazaki, Naoaki", booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.acl-long.546/", doi = "10.18653/v1/2023.acl-long.546", pages = "9802--9822", abstract = "Despite their impressive performance on diverse tasks, large language models (LMs) still struggle with tasks requiring rich world knowledge, implying the difficulty of encoding a wealth of world knowledge in their parameters. This paper aims to understand LMs' strengths and limitations in memorizing factual knowledge, by conducting large-scale knowledge probing experiments on two open-domain entity-centric QA datasets: PopQA, our new dataset with 14k questions about long-tail entities, and EntityQuestions, a widely used open-domain QA dataset. We find that LMs struggle with less popular factual knowledge, and that retrieval augmentation helps significantly in these cases. Scaling, on the other hand, mainly improves memorization of popular knowledge, and fails to appreciably improve memorization of factual knowledge in the tail. Based on those findings, we devise a new method for retrieval-augmentation that improves performance and reduces inference costs by only retrieving non-parametric memories when necessary." }</pre><div class="modal-footer pb-1"><a class="btn btn-secondary btn-filesaver disabled" data-filesaver-target=#citeBibtexContent data-filesaver-name=2023.acl-long.546.bib><i class="fas fa-download pr-2"></i>Download as File</a> <button class="btn btn-clipboard btn-primary d-none" data-clipboard-target=#citeBibtexContent><i class="far fa-clipboard pr-2"></i>Copy to Clipboard</button></div></div><div class=tab-pane id=citeMods role=tabpanel><pre id=citeModsContent class="bg-light border p-2" style=max-height:50vh><?xml version="1.0" encoding="UTF-8"?> <modsCollection xmlns="http://www.loc.gov/mods/v3"> <mods ID="mallen-etal-2023-trust"> <titleInfo> <title>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories</title> </titleInfo> <name type="personal"> <namePart type="given">Alex</namePart> <namePart type="family">Mallen</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Akari</namePart> <namePart type="family">Asai</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Victor</namePart> <namePart type="family">Zhong</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Rajarshi</namePart> <namePart type="family">Das</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Daniel</namePart> <namePart type="family">Khashabi</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Hannaneh</namePart> <namePart type="family">Hajishirzi</namePart> <role> <roleTerm authority="marcrelator" type="text">author</roleTerm> </role> </name> <originInfo> <dateIssued>2023-07</dateIssued> </originInfo> <typeOfResource>text</typeOfResource> <relatedItem type="host"> <titleInfo> <title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title> </titleInfo> <name type="personal"> <namePart type="given">Anna</namePart> <namePart type="family">Rogers</namePart> <role> <roleTerm authority="marcrelator" type="text">editor</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Jordan</namePart> <namePart type="family">Boyd-Graber</namePart> <role> <roleTerm authority="marcrelator" type="text">editor</roleTerm> </role> </name> <name type="personal"> <namePart type="given">Naoaki</namePart> <namePart type="family">Okazaki</namePart> <role> <roleTerm authority="marcrelator" type="text">editor</roleTerm> </role> </name> <originInfo> <publisher>Association for Computational Linguistics</publisher> <place> <placeTerm type="text">Toronto, Canada</placeTerm> </place> </originInfo> <genre authority="marcgt">conference publication</genre> </relatedItem> <abstract>Despite their impressive performance on diverse tasks, large language models (LMs) still struggle with tasks requiring rich world knowledge, implying the difficulty of encoding a wealth of world knowledge in their parameters. This paper aims to understand LMs’ strengths and limitations in memorizing factual knowledge, by conducting large-scale knowledge probing experiments on two open-domain entity-centric QA datasets: PopQA, our new dataset with 14k questions about long-tail entities, and EntityQuestions, a widely used open-domain QA dataset. We find that LMs struggle with less popular factual knowledge, and that retrieval augmentation helps significantly in these cases. Scaling, on the other hand, mainly improves memorization of popular knowledge, and fails to appreciably improve memorization of factual knowledge in the tail. Based on those findings, we devise a new method for retrieval-augmentation that improves performance and reduces inference costs by only retrieving non-parametric memories when necessary.</abstract> <identifier type="citekey">mallen-etal-2023-trust</identifier> <identifier type="doi">10.18653/v1/2023.acl-long.546</identifier> <location> <url>https://aclanthology.org/2023.acl-long.546/</url> </location> <part> <date>2023-07</date> <extent unit="page"> <start>9802</start> <end>9822</end> </extent> </part> </mods> </modsCollection> </pre><div class="modal-footer pb-1"><a class="btn btn-secondary btn-filesaver disabled" data-filesaver-target=#citeModsContent data-filesaver-name=2023.acl-long.546.xml><i class="fas fa-download pr-2"></i>Download as File</a> <button class="btn btn-clipboard btn-primary d-none" data-clipboard-target=#citeModsContent><i class="far fa-clipboard pr-2"></i>Copy to Clipboard</button></div></div><div class=tab-pane id=citeEndnote role=tabpanel><pre id=citeEndnoteContent class="bg-light border p-2" style=max-height:50vh>%0 Conference Proceedings %T When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories %A Mallen, Alex %A Asai, Akari %A Zhong, Victor %A Das, Rajarshi %A Khashabi, Daniel %A Hajishirzi, Hannaneh %Y Rogers, Anna %Y Boyd-Graber, Jordan %Y Okazaki, Naoaki %S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) %D 2023 %8 July %I Association for Computational Linguistics %C Toronto, Canada %F mallen-etal-2023-trust %X Despite their impressive performance on diverse tasks, large language models (LMs) still struggle with tasks requiring rich world knowledge, implying the difficulty of encoding a wealth of world knowledge in their parameters. This paper aims to understand LMs’ strengths and limitations in memorizing factual knowledge, by conducting large-scale knowledge probing experiments on two open-domain entity-centric QA datasets: PopQA, our new dataset with 14k questions about long-tail entities, and EntityQuestions, a widely used open-domain QA dataset. We find that LMs struggle with less popular factual knowledge, and that retrieval augmentation helps significantly in these cases. Scaling, on the other hand, mainly improves memorization of popular knowledge, and fails to appreciably improve memorization of factual knowledge in the tail. Based on those findings, we devise a new method for retrieval-augmentation that improves performance and reduces inference costs by only retrieving non-parametric memories when necessary. %R 10.18653/v1/2023.acl-long.546 %U https://aclanthology.org/2023.acl-long.546/ %U https://doi.org/10.18653/v1/2023.acl-long.546 %P 9802-9822</pre><div class="modal-footer pb-1"><a class="btn btn-secondary btn-filesaver disabled" data-filesaver-target=#citeEndnoteContent data-filesaver-name=2023.acl-long.546.endf><i class="fas fa-download pr-2"></i>Download as File</a> <button class="btn btn-clipboard btn-primary d-none" data-clipboard-target=#citeEndnoteContent><i class="far fa-clipboard pr-2"></i>Copy to Clipboard</button></div></div><div class=tab-pane id=citeMarkdown role=tabpanel><h5>Markdown (Informal)</h5><p id=citeMarkdownContent class="text-monospace small bg-light border p-2">[When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories](https://aclanthology.org/2023.acl-long.546/) (Mallen et al., ACL 2023)</p><ul class=mt-2><li><a href=https://aclanthology.org/2023.acl-long.546/>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories</a> (Mallen et al., ACL 2023)</li></ul><h5>ACL</h5><ul class=mt-2><li id=citeACLstyleContent>Alex Mallen, Akari Asai, Victor Zhong, Rajarshi Das, Daniel Khashabi, and Hannaneh Hajishirzi. 2023. <a href=https://aclanthology.org/2023.acl-long.546/>When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories</a>. In <i>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</i>, pages 9802–9822, Toronto, Canada. Association for Computational Linguistics.</li></ul><div class="modal-footer pb-1"><button type=button class="btn btn-clipboard btn-primary d-none" data-clipboard-target=#citeMarkdownContent><i class="far fa-clipboard pr-2"></i>Copy Markdown to Clipboard</button> <button type=button class="btn btn-clipboard btn-primary d-none" data-clipboard-target=#citeACLstyleContent><i class="far fa-clipboard pr-2"></i>Copy ACL to Clipboard</button></div></div></div></div></div></div></div></section></div><footer class="bg-gradient-light py-2 py-xl-3 mt-3 mt-md-4 mt-xl-5"><div class=container><p class="text-muted small px-1"><span class="float-right mt-2 ml-2"><a rel=license href=http://creativecommons.org/licenses/by/4.0/><img alt="Creative Commons License" style=border-width:0 src=https://i.creativecommons.org/l/by/4.0/88x31.png></a></span> ACL materials are Copyright © 1963–2025 ACL; other materials are copyrighted by their respective copyright holders. Materials prior to 2016 here are licensed under the <a href=https://creativecommons.org/licenses/by-nc-sa/3.0/>Creative Commons Attribution-NonCommercial-ShareAlike 3.0 International License</a>. Permission is granted to make copies for the purposes of teaching and research. Materials published in or after 2016 are licensed on a <a href=https://creativecommons.org/licenses/by/4.0/>Creative Commons Attribution 4.0 International License</a>.</p><p class="text-muted small px-1">The ACL Anthology is managed and built by the <a href=/info/credits/>ACL Anthology team</a> of volunteers.</p><p class="text-muted small px-1"><i>Site last built on 23 March 2025 at 19:03 UTC with <a href=https://github.com/acl-org/acl-anthology/tree/9092c177e43c2ce65174fc4cace616e5a18c0b08>commit 9092c17</a>.</i></p></div></footer><script src=https://code.jquery.com/jquery-3.3.1.slim.min.js integrity=sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo crossorigin=anonymous></script><script src=https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.6/umd/popper.min.js integrity=sha384-wHAiFfRlMFy6i5SRaxvfOCifBUQy1xHdJ/yoi7FRNXMRBu5WHdZYu1hA6ZOblgut crossorigin=anonymous></script><script src=https://stackpath.bootstrapcdn.com/bootstrap/4.2.1/js/bootstrap.min.js integrity=sha384-B0UglyR+jN6CkvvICOB2joaf5I4l3gm9GU6Hc1og6Ls7i6U/mkkaduKaBhlAXv9k crossorigin=anonymous></script><script src=https://cdn.jsdelivr.net/npm/sortablejs@1.15.0/Sortable.min.js></script><script>$(function(){$('[data-toggle="tooltip"]').tooltip(),$("#toggle-all-abstracts")&&($("#toggle-all-abstracts").on("click",function(){var e=$("#toggle-all-abstracts");e.attr("disabled",!0),e.attr("data-toggle-state")=="hide"?($(".abstract-collapse").collapse("show"),e.attr("data-toggle-state","show")):($(".abstract-collapse").collapse("hide"),e.attr("data-toggle-state","hide")),e.attr("disabled",!1)}),$("#toggle-all-abstracts").attr("disabled",!1))})</script><script src=/js/clipboard.min.js></script><script src=/js/FileSaver.js></script><script>$(document).ready(function(){if(ClipboardJS.isSupported()){success_fn=function(e){var t=$(e.trigger);t.toggleClass("btn-success"),t.children("i").toggleClass("far fa-clipboard fas fa-clipboard-check"),e.clearSelection(),setTimeout(function(){t.toggleClass("btn-success"),t.children("i").toggleClass("far fa-clipboard fas fa-clipboard-check")},2e3)};var e,t=new ClipboardJS(".btn-clipboard");t.on("success",success_fn),$(".btn-clipboard").removeClass("d-none"),e=new ClipboardJS(".btn-clipboard-outside",{text:function(e){var t=e.getAttribute("data-clipboard-target");return $(t).text()}}),e.on("success",success_fn),$(".btn-clipboard-outside").removeClass("d-none")}}),$(document).ready(function(){$(".btn-filesaver")&&($(".btn-filesaver").on("click",function(){var e=$(this).attr("data-filesaver-target"),t=$(this).attr("data-filesaver-name"),n=new Blob([$(e).text()],{type:"text/plain;charset=utf-8"});saveAs(n,t)}),$(".btn-filesaver").removeClass("disabled"))});const paper_params={anthology_id:"2023.acl-long.546",title:"When Not to Trust Language Models: Investigating Effectiveness of Parametric and Non-Parametric Memories",authors:[{first:"Alex",last:"Mallen",id:"alex-mallen"},{first:"Akari",last:"Asai",id:"akari-asai"},{first:"Victor",last:"Zhong",id:"victor-zhong"},{first:"Rajarshi",last:"Das",id:"rajarshi-das"},{first:"Daniel",last:"Khashabi",id:"daniel-khashabi"},{first:"Hannaneh",last:"Hajishirzi",id:"hannaneh-hajishirzi"}],abstract:"Despite their impressive performance on diverse tasks, large language models (LMs) still struggle with tasks requiring rich world knowledge, implying the difficulty of encoding a wealth of world knowledge in their parameters. This paper aims to understand LMs’ strengths and limitations in memorizing factual knowledge, by conducting large-scale knowledge probing experiments on two open-domain entity-centric QA datasets: PopQA, our new dataset with 14k questions about long-tail entities, and EntityQuestions, a widely used open-domain QA dataset. We find that LMs struggle with less popular factual knowledge, and that retrieval augmentation helps significantly in these cases. Scaling, on the other hand, mainly improves memorization of popular knowledge, and fails to appreciably improve memorization of factual knowledge in the tail. Based on those findings, we devise a new method for retrieval-augmentation that improves performance and reduces inference costs by only retrieving non-parametric memories when necessary."};function showMetadataDialog(){document.getElementById("paperIdSpan").textContent=paper_params.anthology_id,document.getElementById("paperTitle").value=paper_params.title,document.getElementById("paperAbstract").value=paper_params.abstract;const e=document.getElementById("authorsContainer");e.innerHTML="",paper_params.authors.forEach((t)=>{e.appendChild(createAuthorRow(t.first,t.last,t.id))});const t=new bootstrap.Modal(document.getElementById("metadataModal"));t.show()}new Sortable(document.getElementById("authorsContainer"),{animation:150,ghostClass:"sortable-ghost"}),authorsContainer.addEventListener("dragstart",e=>{const t=e.target.closest(".author-row");t&&(draggedElement=t,e.dataTransfer.effectAllowed="move",e.dataTransfer.setData("text/plain","reordering"))}),authorsContainer.addEventListener("dragover",e=>{e.preventDefault(),e.dataTransfer.dropEffect="move"}),authorsContainer.addEventListener("drop",e=>{e.preventDefault();const t=e.target.closest(".author-row");t&&t!==draggedElement?authorsContainer.insertBefore(draggedElement,t):t||authorsContainer.appendChild(draggedElement),draggedElement=null});function createAuthorRow(e,t,n){const s=document.createElement("div");s.className="row g-0 g-lg-2 mb-2 author-row align-items-center",s.draggable=!0,s.ondragstart=dragAuthor;const c=document.createElement("div");c.className="col-auto pe-1";const o=document.createElement("span");o.className="drag-handle",o.textContent="⋮",o.style="padding: 0 2px",o.draggable=!0,c.appendChild(o);const l=document.createElement("div");l.className="col-10 col-lg-4";const i=document.createElement("input");i.type="text",i.placeholder="First name",i.className="form-control",i.value=e,l.appendChild(i),c.appendChild(l);const d=document.createElement("div");d.className="col-10 col-lg-4 mt-2 mt-lg-0";const a=document.createElement("input");a.type="text",a.placeholder="Last name",a.className="form-control",a.value=t,d.appendChild(a);const u=document.createElement("input");u.type="hidden",u.value=n,d.appendChild(u);const h=document.createElement("div");h.className="col-auto ms-lg-auto text-end";const r=document.createElement("button");return r.type="button",r.className="btn btn-sm btn-danger",r.textContent="X",r.onclick=()=>s.remove(),h.appendChild(r),s.appendChild(c),s.appendChild(l),s.appendChild(d),s.appendChild(h),s}function addAuthor(){const e=document.getElementById("authorsContainer");e.appendChild(createAuthorRow("","","",""))}let draggedAuthor=null;function dragAuthor(e){e.dataTransfer.setData("text/plain",""),draggedAuthor=e.currentTarget}function allowDrop(e){e.preventDefault()}function dropAuthor(e){if(e.preventDefault(),e.target.id==="authorsContainer"||e.target.parentNode.id==="authorsContainer"){const t=document.getElementById("authorsContainer");e.target.classList&&e.target.classList.contains("author-row")?t.insertBefore(draggedAuthor,e.target):e.target.parentNode.classList&&e.target.parentNode.classList.contains("author-row")?t.insertBefore(draggedAuthor,e.target.parentNode):t.appendChild(draggedAuthor)}}function submitMetadataCorrection(){if(!document.getElementById("pdfCorrectionCheck").checked){alert("Please check the box to confirm that these changes match the PDF.");return}const s=document.getElementById("paperTitle").value,n=document.getElementById("paperAbstract").value,a=document.querySelectorAll("#authorsContainer .author-row"),t=[];a.forEach(e=>{const n=e.querySelectorAll("input");t.push({first:n[0].value,last:n[1].value,id:n[2].value})});const e={anthology_id:paper_params.anthology_id};s!==paper_params.title&&(e.title=s),n!==paper_params.abstract&&(e.abstract=n);const i=JSON.stringify(paper_params.authors),o=JSON.stringify(t);if(o!=i&&(e.authors=t,e.authors_old=paper_params.authors.map(e=>e.first+" "+e.last).join(" | "),e.authors_new=t.map(e=>e.first+" "+e.last).join(" | ")),Object.keys(e).length===1){alert("No changes detected.");return}const r="https://github.com/acl-org/acl-anthology/issues/new?template=99-bulk-metadata-correction.yml",c="Metadata correction for 2023.acl-long.546",l="metadata,correction",d="anthology-assist",u="```json\n"+JSON.stringify(e,null,2)+"\n```",h=r+`&title=${encodeURIComponent(c)}&assignee=${encodeURIComponent(d)}&labels=${encodeURIComponent(l)}&data=`+encodeURIComponent(u);window.open(h,"_blank")}</script></body></html>