CINXE.COM
Papers — EleutherAI
<!doctype html> <html xmlns:og="http://opengraphprotocol.org/schema/" xmlns:fb="http://www.facebook.com/2008/fbml" lang="en-GB" > <head> <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> <meta name="viewport" content="width=device-width, initial-scale=1"> <!-- This is Squarespace. --><!-- tiger-goose-hwrx --> <base href=""> <meta charset="utf-8" /> <title>Papers — EleutherAI</title> <meta http-equiv="Accept-CH" content="Sec-CH-UA-Platform-Version, Sec-CH-UA-Model" /><link rel="icon" type="image/x-icon" href="https://images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/238b1c4e-e4d7-4efb-9a91-aee4af7123e4/favicon.ico?format=100w"/> <link rel="canonical" href="https://www.eleuther.ai/papers-blog"/> <meta property="og:site_name" content="EleutherAI"/> <meta property="og:title" content="Papers — EleutherAI"/> <meta property="og:url" content="https://www.eleuther.ai/papers-blog"/> <meta property="og:type" content="website"/> <meta property="og:image" content="http://static1.squarespace.com/static/6343e7de9a7c4b05ef290bd4/t/63dd715236e422768193a043/1675456850403/eleutherai+logo.png?format=1500w"/> <meta property="og:image:width" content="192"/> <meta property="og:image:height" content="218"/> <meta itemprop="name" content="Papers — EleutherAI"/> <meta itemprop="url" content="https://www.eleuther.ai/papers-blog"/> <meta itemprop="thumbnailUrl" content="http://static1.squarespace.com/static/6343e7de9a7c4b05ef290bd4/t/63dd715236e422768193a043/1675456850403/eleutherai+logo.png?format=1500w"/> <link rel="image_src" href="http://static1.squarespace.com/static/6343e7de9a7c4b05ef290bd4/t/63dd715236e422768193a043/1675456850403/eleutherai+logo.png?format=1500w" /> <meta itemprop="image" content="http://static1.squarespace.com/static/6343e7de9a7c4b05ef290bd4/t/63dd715236e422768193a043/1675456850403/eleutherai+logo.png?format=1500w"/> <meta name="twitter:title" content="Papers — EleutherAI"/> <meta name="twitter:image" content="http://static1.squarespace.com/static/6343e7de9a7c4b05ef290bd4/t/63dd715236e422768193a043/1675456850403/eleutherai+logo.png?format=1500w"/> <meta name="twitter:url" content="https://www.eleuther.ai/papers-blog"/> <meta name="twitter:card" content="summary"/> <meta name="description" content="" /> <link rel="preconnect" href="https://images.squarespace-cdn.com"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,600;0,700;1,300;1,400;1,700"> <script type="text/javascript" crossorigin="anonymous" defer="true" nomodule="nomodule" src="//assets.squarespace.com/@sqs/polyfiller/1.6/legacy.js"></script> <script type="text/javascript" crossorigin="anonymous" defer="true" src="//assets.squarespace.com/@sqs/polyfiller/1.6/modern.js"></script> <script type="text/javascript">SQUARESPACE_ROLLUPS = {};</script> <script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/extract-css-runtime-801142e52956f6365b87-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-extract_css_runtime');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/extract-css-runtime-801142e52956f6365b87-min.en-US.js" defer ></script><script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/extract-css-moment-js-vendor-c4d588d4255cd4c70fa3-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-extract_css_moment_js_vendor');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/extract-css-moment-js-vendor-c4d588d4255cd4c70fa3-min.en-US.js" defer ></script><script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/cldr-resource-pack-0c96ad3f9948c3a7b631-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-cldr_resource_pack');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/cldr-resource-pack-0c96ad3f9948c3a7b631-min.en-US.js" defer ></script><script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/common-vendors-stable-2965e1fc774101038a7d-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-common_vendors_stable');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/common-vendors-stable-2965e1fc774101038a7d-min.en-US.js" defer ></script><script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/common-vendors-d51c5ed0e7231830966d-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-common_vendors');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/common-vendors-d51c5ed0e7231830966d-min.en-US.js" defer ></script><script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/common-37662abdb1a756e50ded-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-common');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/common-37662abdb1a756e50ded-min.en-US.js" defer ></script><script>(function(rollups, name) { if (!rollups[name]) { rollups[name] = {}; } rollups[name].js = ["//assets.squarespace.com/universal/scripts-compressed/performance-7ee0cfa0e6f1539ac9e2-min.en-US.js"]; })(SQUARESPACE_ROLLUPS, 'squarespace-performance');</script> <script crossorigin="anonymous" src="//assets.squarespace.com/universal/scripts-compressed/performance-7ee0cfa0e6f1539ac9e2-min.en-US.js" defer ></script><script data-name="static-context">Static = window.Static || {}; Static.SQUARESPACE_CONTEXT = {"betaFeatureFlags":["nested_categories","campaigns_discount_section_in_blasts","override_block_styles","new_stacked_index","campaigns_import_discounts","campaigns_new_image_layout_picker","order_status_page_checkout_landing_enabled","collection_typename_switching","contacts_and_campaigns_redesign","campaigns_thumbnail_layout","supports_versioned_template_assets","member_areas_feature","campaigns_discount_section_in_automations","scripts_defer","i18n_beta_website_locales","themes","marketing_landing_page","marketing_automations"],"facebookAppId":"314192535267336","facebookApiVersion":"v6.0","rollups":{"squarespace-announcement-bar":{"js":"//assets.squarespace.com/universal/scripts-compressed/announcement-bar-55de1fa5f1df35771dab-min.en-US.js"},"squarespace-audio-player":{"css":"//assets.squarespace.com/universal/styles-compressed/audio-player-7273fb3c727315fb7087-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/audio-player-0b63cc8989da2584398a-min.en-US.js"},"squarespace-blog-collection-list":{"css":"//assets.squarespace.com/universal/styles-compressed/blog-collection-list-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/blog-collection-list-3b161a94b70c22562b65-min.en-US.js"},"squarespace-calendar-block-renderer":{"css":"//assets.squarespace.com/universal/styles-compressed/calendar-block-renderer-45e10dd8aac6cdf250c5-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/calendar-block-renderer-d8cf3dcdd62428f74ed7-min.en-US.js"},"squarespace-chartjs-helpers":{"css":"//assets.squarespace.com/universal/styles-compressed/chartjs-helpers-035fad01719b3ad44ea0-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/chartjs-helpers-87bcc1cf71dc32506780-min.en-US.js"},"squarespace-comments":{"css":"//assets.squarespace.com/universal/styles-compressed/comments-8d82b7dcc10a49d8f3ab-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/comments-65d2f93ce7861fe887a5-min.en-US.js"},"squarespace-custom-css-popup":{"css":"//assets.squarespace.com/universal/styles-compressed/custom-css-popup-42165bae3117a373beba-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/custom-css-popup-a4b9902229f7f022d0d2-min.en-US.js"},"squarespace-dialog":{"css":"//assets.squarespace.com/universal/styles-compressed/dialog-4442f8c03c49894df614-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/dialog-2d9c48f2a7dfbf6c15d5-min.en-US.js"},"squarespace-events-collection":{"css":"//assets.squarespace.com/universal/styles-compressed/events-collection-45e10dd8aac6cdf250c5-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/events-collection-19027f381f07f32cdaf0-min.en-US.js"},"squarespace-form-rendering-utils":{"js":"//assets.squarespace.com/universal/scripts-compressed/form-rendering-utils-b0d9bba4a625e472163a-min.en-US.js"},"squarespace-forms":{"css":"//assets.squarespace.com/universal/styles-compressed/forms-561e399da7e8cd21136e-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/forms-28de246edbbda1a16dfa-min.en-US.js"},"squarespace-gallery-collection-list":{"css":"//assets.squarespace.com/universal/styles-compressed/gallery-collection-list-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/gallery-collection-list-6a695d6f503d6f274456-min.en-US.js"},"squarespace-image-zoom":{"css":"//assets.squarespace.com/universal/styles-compressed/image-zoom-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/image-zoom-86a4f1190d4dd0a1bb85-min.en-US.js"},"squarespace-pinterest":{"css":"//assets.squarespace.com/universal/styles-compressed/pinterest-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/pinterest-9212e087eaf50726f369-min.en-US.js"},"squarespace-popup-overlay":{"css":"//assets.squarespace.com/universal/styles-compressed/popup-overlay-1953548b82872bd9f153-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/popup-overlay-587967afb7a6c9edca5c-min.en-US.js"},"squarespace-product-quick-view":{"css":"//assets.squarespace.com/universal/styles-compressed/product-quick-view-cabe0d0e788aff914259-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/product-quick-view-f2d8a26b5990dab5b0e2-min.en-US.js"},"squarespace-products-collection-item-v2":{"css":"//assets.squarespace.com/universal/styles-compressed/products-collection-item-v2-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/products-collection-item-v2-bc54407b17c1344e3754-min.en-US.js"},"squarespace-products-collection-list-v2":{"css":"//assets.squarespace.com/universal/styles-compressed/products-collection-list-v2-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/products-collection-list-v2-320bbe708a4fc56f6d2e-min.en-US.js"},"squarespace-search-page":{"css":"//assets.squarespace.com/universal/styles-compressed/search-page-efbf373f59490877e106-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/search-page-4a288f250b4808ea00c2-min.en-US.js"},"squarespace-search-preview":{"js":"//assets.squarespace.com/universal/scripts-compressed/search-preview-ced3d433fbf9b0bf2b63-min.en-US.js"},"squarespace-simple-liking":{"css":"//assets.squarespace.com/universal/styles-compressed/simple-liking-87ea620354232c2e7b9f-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/simple-liking-b240c4039cac2761e6d0-min.en-US.js"},"squarespace-social-buttons":{"css":"//assets.squarespace.com/universal/styles-compressed/social-buttons-7430c3ff0790eafdec2c-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/social-buttons-aed9787d6fa93b4c7288-min.en-US.js"},"squarespace-tourdates":{"css":"//assets.squarespace.com/universal/styles-compressed/tourdates-04589f0ab778d39b6f29-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/tourdates-7f2878aec2a56b6bc77f-min.en-US.js"},"squarespace-website-overlays-manager":{"css":"//assets.squarespace.com/universal/styles-compressed/website-overlays-manager-a81f4f351e0b41eedf7d-min.en-US.css","js":"//assets.squarespace.com/universal/scripts-compressed/website-overlays-manager-614944b70c012981064b-min.en-US.js"}},"pageType":1,"website":{"id":"6343e7de9a7c4b05ef290bd4","identifier":"tiger-goose-hwrx","websiteType":1,"contentModifiedOn":1720533044594,"cloneable":false,"hasBeenCloneable":false,"siteStatus":{},"language":"en-GB","timeZone":"Europe/London","machineTimeZoneOffset":0,"timeZoneOffset":0,"timeZoneAbbr":"GMT","siteTitle":"EleutherAI","fullSiteTitle":"Papers \u2014 EleutherAI","siteDescription":"","logoImageId":"63f87ab98880e06e7b6cacb2","mobileLogoImageId":"6343f35e15a71e54bcf1ba76","socialLogoImageId":"63dd715236e422768193a043","shareButtonOptions":{"6":true,"1":true,"8":true,"2":true,"7":true,"3":true,"4":true},"logoImageUrl":"//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png","mobileLogoImageUrl":"//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png","socialLogoImageUrl":"//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/02fd467c-7db7-47c9-a6df-4c8d1cb556cc/eleutherai+logo.png","authenticUrl":"https://www.eleuther.ai","internalUrl":"https://tiger-goose-hwrx.squarespace.com","baseUrl":"https://www.eleuther.ai","primaryDomain":"www.eleuther.ai","sslSetting":3,"isHstsEnabled":true,"socialAccounts":[{"serviceId":20,"userId":"contact@eleuther.ai","screenname":"contact@eleuther.ai","addedOn":1665586238169,"profileUrl":"mailto:contact@eleuther.ai","iconEnabled":true,"serviceName":"email"},{"serviceId":81,"screenname":"Discord","addedOn":1665397723993,"profileUrl":"https://discord.gg/zBGx3azzUn","iconEnabled":true,"serviceName":"discord-unauth"},{"serviceId":73,"screenname":"GitHub","addedOn":1665397752392,"profileUrl":"https://github.com/EleutherAI","iconEnabled":true,"serviceName":"github-unauth"},{"serviceId":62,"screenname":"Twitter","addedOn":1665586259904,"profileUrl":"https://twitter.com/AiEleuther","iconEnabled":true,"serviceName":"twitter-unauth"}],"typekitId":"","statsMigrated":false,"imageMetadataProcessingEnabled":false,"screenshotId":"f24832190f019a0f708e907d7090cb4b51e68b6dc324a3f5ae54bf890cc76c9a","captchaSettings":{"enabledForDonations":false},"showOwnerLogin":false},"websiteSettings":{"id":"6343e7de9a7c4b05ef290bd7","websiteId":"6343e7de9a7c4b05ef290bd4","subjects":[],"country":"GB","state":"ENG","simpleLikingEnabled":true,"mobileInfoBarSettings":{"isContactEmailEnabled":false,"isContactPhoneNumberEnabled":false,"isLocationEnabled":false,"isBusinessHoursEnabled":false},"commentLikesAllowed":true,"commentAnonAllowed":true,"commentThreaded":true,"commentApprovalRequired":false,"commentAvatarsOn":true,"commentSortType":2,"commentFlagThreshold":0,"commentFlagsAllowed":true,"commentEnableByDefault":true,"commentDisableAfterDaysDefault":0,"disqusShortname":"","commentsEnabled":false,"storeSettings":{"returnPolicy":null,"termsOfService":null,"privacyPolicy":null,"expressCheckout":false,"continueShoppingLinkUrl":"/","useLightCart":false,"showNoteField":false,"shippingCountryDefaultValue":"US","billToShippingDefaultValue":false,"showShippingPhoneNumber":true,"isShippingPhoneRequired":false,"showBillingPhoneNumber":true,"isBillingPhoneRequired":false,"currenciesSupported":["USD","CAD","GBP","AUD","EUR","CHF","NOK","SEK","DKK","NZD","SGD","MXN","HKD","CZK","ILS","MYR","RUB","PHP","PLN","THB","BRL","ARS","COP","IDR","INR","JPY","ZAR"],"defaultCurrency":"USD","selectedCurrency":"GBP","measurementStandard":1,"showCustomCheckoutForm":false,"checkoutPageMarketingOptInEnabled":true,"enableMailingListOptInByDefault":false,"sameAsRetailLocation":false,"merchandisingSettings":{"scarcityEnabledOnProductItems":false,"scarcityEnabledOnProductBlocks":false,"scarcityMessageType":"DEFAULT_SCARCITY_MESSAGE","scarcityThreshold":10,"multipleQuantityAllowedForServices":true,"restockNotificationsEnabled":false,"restockNotificationsSuccessText":"","restockNotificationsMailingListSignUpEnabled":false,"relatedProductsEnabled":false,"relatedProductsOrdering":"random","soldOutVariantsDropdownDisabled":false,"productComposerOptedIn":false,"productComposerABTestOptedOut":false,"productReviewsEnabled":false},"minimumOrderSubtotalEnabled":false,"minimumOrderSubtotal":{"currency":"GBP","value":"0.00"},"isLive":false,"multipleQuantityAllowedForServices":true},"useEscapeKeyToLogin":false,"ssBadgeType":1,"ssBadgePosition":4,"ssBadgeVisibility":1,"ssBadgeDevices":1,"pinterestOverlayOptions":{"mode":"disabled"},"ampEnabled":false,"userAccountsSettings":{"loginAllowed":false,"signupAllowed":false}},"cookieSettings":{"isCookieBannerEnabled":false,"isRestrictiveCookiePolicyEnabled":false,"cookieBannerText":"","cookieBannerTheme":"","cookieBannerVariant":"","cookieBannerPosition":"","cookieBannerCtaVariant":"","cookieBannerCtaText":"","cookieBannerAcceptType":"OPT_IN","cookieBannerOptOutCtaText":"","cookieBannerHasOptOut":false,"cookieBannerHasManageCookies":true,"cookieBannerManageCookiesLabel":"","cookieBannerSavedPreferencesText":"","cookieBannerSavedPreferencesLayout":"PILL"},"websiteCloneable":false,"collection":{"title":"Papers","id":"63ce12ca4c8b242c83b69033","fullUrl":"/papers-blog","type":1,"permissionType":1},"subscribed":false,"appDomain":"squarespace.com","templateTweakable":true,"tweakJSON":{"form-use-theme-colors":"false","header-logo-height":"47px","header-mobile-logo-max-height":"64px","header-vert-padding":"1.2vw","header-width":"Full","maxPageWidth":"1980px","pagePadding":"5vw","tweak-blog-alternating-side-by-side-image-aspect-ratio":"4:3 Four-Three","tweak-blog-alternating-side-by-side-image-spacing":"5%","tweak-blog-alternating-side-by-side-meta-spacing":"10px","tweak-blog-alternating-side-by-side-primary-meta":"Date","tweak-blog-alternating-side-by-side-read-more-spacing":"5px","tweak-blog-alternating-side-by-side-secondary-meta":"Categories","tweak-blog-basic-grid-columns":"2","tweak-blog-basic-grid-image-aspect-ratio":"3:2 Standard","tweak-blog-basic-grid-image-spacing":"18px","tweak-blog-basic-grid-meta-spacing":"0px","tweak-blog-basic-grid-primary-meta":"Categories","tweak-blog-basic-grid-read-more-spacing":"0px","tweak-blog-basic-grid-secondary-meta":"None","tweak-blog-item-custom-width":"60","tweak-blog-item-show-author-profile":"false","tweak-blog-item-width":"Medium","tweak-blog-masonry-columns":"3","tweak-blog-masonry-horizontal-spacing":"60px","tweak-blog-masonry-image-spacing":"30px","tweak-blog-masonry-meta-spacing":"10px","tweak-blog-masonry-primary-meta":"Categories","tweak-blog-masonry-read-more-spacing":"5px","tweak-blog-masonry-secondary-meta":"None","tweak-blog-masonry-vertical-spacing":"120px","tweak-blog-side-by-side-image-aspect-ratio":"1:1 Square","tweak-blog-side-by-side-image-spacing":"4%","tweak-blog-side-by-side-meta-spacing":"0px","tweak-blog-side-by-side-primary-meta":"Categories","tweak-blog-side-by-side-read-more-spacing":"5px","tweak-blog-side-by-side-secondary-meta":"Date","tweak-blog-single-column-image-spacing":"40px","tweak-blog-single-column-meta-spacing":"4px","tweak-blog-single-column-primary-meta":"Date","tweak-blog-single-column-read-more-spacing":"10px","tweak-blog-single-column-secondary-meta":"Categories","tweak-events-stacked-show-thumbnails":"true","tweak-events-stacked-thumbnail-size":"3:2 Standard","tweak-fixed-header":"true","tweak-fixed-header-style":"Basic","tweak-global-animations-animation-curve":"ease","tweak-global-animations-animation-delay":"1.0s","tweak-global-animations-animation-duration":"0.65s","tweak-global-animations-animation-style":"fade","tweak-global-animations-animation-type":"flex","tweak-global-animations-complexity-level":"basic","tweak-global-animations-enabled":"true","tweak-portfolio-grid-basic-custom-height":"50","tweak-portfolio-grid-overlay-custom-height":"50","tweak-portfolio-hover-follow-acceleration":"10%","tweak-portfolio-hover-follow-animation-duration":"Fast","tweak-portfolio-hover-follow-animation-type":"Fade","tweak-portfolio-hover-follow-delimiter":"Bullet","tweak-portfolio-hover-follow-front":"false","tweak-portfolio-hover-follow-layout":"Inline","tweak-portfolio-hover-follow-size":"50","tweak-portfolio-hover-follow-text-spacing-x":"1.5","tweak-portfolio-hover-follow-text-spacing-y":"1.5","tweak-portfolio-hover-static-animation-duration":"Fast","tweak-portfolio-hover-static-animation-type":"Fade","tweak-portfolio-hover-static-delimiter":"Hyphen","tweak-portfolio-hover-static-front":"true","tweak-portfolio-hover-static-layout":"Inline","tweak-portfolio-hover-static-size":"50","tweak-portfolio-hover-static-text-spacing-x":"1.5","tweak-portfolio-hover-static-text-spacing-y":"1.5","tweak-portfolio-index-background-animation-duration":"Medium","tweak-portfolio-index-background-animation-type":"Fade","tweak-portfolio-index-background-custom-height":"50","tweak-portfolio-index-background-delimiter":"None","tweak-portfolio-index-background-height":"Large","tweak-portfolio-index-background-horizontal-alignment":"Center","tweak-portfolio-index-background-link-format":"Stacked","tweak-portfolio-index-background-persist":"false","tweak-portfolio-index-background-vertical-alignment":"Middle","tweak-portfolio-index-background-width":"Full Bleed","tweak-product-basic-item-click-action":"None","tweak-product-basic-item-gallery-aspect-ratio":"3:4 Three-Four (Vertical)","tweak-product-basic-item-gallery-design":"Slideshow","tweak-product-basic-item-gallery-width":"50%","tweak-product-basic-item-hover-action":"None","tweak-product-basic-item-image-spacing":"3vw","tweak-product-basic-item-image-zoom-factor":"1.75","tweak-product-basic-item-product-variant-display":"Dropdown","tweak-product-basic-item-thumbnail-placement":"Side","tweak-product-basic-item-variant-picker-layout":"Dropdowns","tweak-products-add-to-cart-button":"false","tweak-products-columns":"3","tweak-products-gutter-column":"2vw","tweak-products-gutter-row":"3vw","tweak-products-header-text-alignment":"Middle","tweak-products-image-aspect-ratio":"1:1 Square","tweak-products-image-text-spacing":"1vw","tweak-products-mobile-columns":"1","tweak-products-text-alignment":"Left","tweak-products-width":"Inset","tweak-transparent-header":"false"},"templateId":"5c5a519771c10ba3470d8101","templateVersion":"7.1","pageFeatures":[1,2,4],"gmRenderKey":"QUl6YVN5Q0JUUk9xNkx1dkZfSUUxcjQ2LVQ0QWVUU1YtMGQ3bXk4","templateScriptsRootUrl":"https://static1.squarespace.com/static/vta/5c5a519771c10ba3470d8101/scripts/","impersonatedSession":false,"tzData":{"zones":[[0,"EU","GMT/BST",null]],"rules":{"EU":[[1981,"max",null,"Mar","lastSun","1:00u","1:00","S"],[1996,"max",null,"Oct","lastSun","1:00u","0",null]]}},"showAnnouncementBar":false,"recaptchaEnterpriseContext":{"recaptchaEnterpriseSiteKey":"6LdDFQwjAAAAAPigEvvPgEVbb7QBm-TkVJdDTlAv"},"i18nContext":{"timeZoneData":{"id":"Europe/London","name":"Greenwich Mean Time"}},"env":"PRODUCTION"};</script><link rel="alternate" type="application/rss+xml" title="RSS Feed" href="https://www.eleuther.ai/papers-blog?format=rss" /> <script type="application/ld+json">{"url":"https://www.eleuther.ai","name":"EleutherAI","description":"","image":"//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png","@context":"http://schema.org","@type":"WebSite"}</script><link rel="stylesheet" type="text/css" href="https://static1.squarespace.com/static/versioned-site-css/6343e7de9a7c4b05ef290bd4/208/5c5a519771c10ba3470d8101/6343e7de9a7c4b05ef290bee/1614/site.css"/><!-- #1: Load MathJax--> <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML"> </script> <!-- The following scripts only work AFTER the script that loads MathJax --> <!-- #2: Enable equation numbers via \begin{equation}. --> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ TeX: { equationNumbers: { autoNumber: "AMS" } } }); </script> <!-- #3: Enable proper inline equation via $, and also process escape characters (e.g. \$). --> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']], processEscapes:true} }); </script> <!-- #4: Create a MathJax macro that reset's the equation auto-numbers. Thanks to https://github.com/mathjax/MathJax/issues/1294 --> <script type="text/x-mathjax-config"> MathJax.InputJax.TeX.Definitions.Add({ macros: { setCounter: "setCounter" } }, null, true); MathJax.InputJax.TeX.Parse.Augment({ setCounter: function(name) { var num = parseInt(this.GetArgument(name)); MathJax.Extension["TeX/AMSmath"].number = num; } }); </script> <link rel="“stylesheet"" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.10/styles/monokai-sublime.min.css"> <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.10/highlight.min.js"></script> <script>hljs.highlightAll();</script><script>Static.COOKIE_BANNER_CAPABLE = true;</script> <!-- End of Squarespace Headers --> <link rel="stylesheet" type="text/css" href="https://static1.squarespace.com/static/vta/5c5a519771c10ba3470d8101/versioned-assets/1739381320286-8FJE7NMLDZWANM5PXDLU/static.css"> </head> <body id="collection-63ce12ca4c8b242c83b69033" class=" primary-button-style-outline primary-button-shape-pill secondary-button-style-outline secondary-button-shape-underline tertiary-button-style-solid tertiary-button-shape-pill form-field-style-solid form-field-shape-square form-field-border-all form-field-checkbox-type-icon form-field-checkbox-fill-solid form-field-checkbox-color-inverted form-field-checkbox-shape-square form-field-checkbox-layout-stack form-field-radio-type-icon form-field-radio-fill-solid form-field-radio-color-normal form-field-radio-shape-pill form-field-radio-layout-stack form-field-survey-fill-solid form-field-survey-color-normal form-field-survey-shape-pill form-field-hover-focus-outline form-submit-button-style-label header-overlay-alignment-center header-width-full tweak-fixed-header tweak-fixed-header-style-basic tweak-blog-alternating-side-by-side-width-inset tweak-blog-alternating-side-by-side-image-aspect-ratio-43-four-three tweak-blog-alternating-side-by-side-text-alignment-left tweak-blog-alternating-side-by-side-read-more-style-hide tweak-blog-alternating-side-by-side-image-text-alignment-top tweak-blog-alternating-side-by-side-delimiter-bullet tweak-blog-alternating-side-by-side-meta-position-below-excerpt tweak-blog-alternating-side-by-side-primary-meta-date tweak-blog-alternating-side-by-side-secondary-meta-categories tweak-blog-alternating-side-by-side-excerpt-show tweak-blog-basic-grid-width-inset tweak-blog-basic-grid-image-aspect-ratio-32-standard tweak-blog-basic-grid-text-alignment-left tweak-blog-basic-grid-delimiter-dash tweak-blog-basic-grid-image-placement-above tweak-blog-basic-grid-read-more-style-hide tweak-blog-basic-grid-primary-meta-categories tweak-blog-basic-grid-secondary-meta-none tweak-blog-basic-grid-excerpt-show tweak-blog-item-width-medium tweak-blog-item-text-alignment-left tweak-blog-item-meta-position-below-title tweak-blog-item-delimiter-dash tweak-blog-masonry-width-full tweak-blog-masonry-text-alignment-left tweak-blog-masonry-primary-meta-categories tweak-blog-masonry-secondary-meta-none tweak-blog-masonry-meta-position-below-excerpt tweak-blog-masonry-read-more-style-hide tweak-blog-masonry-delimiter-bullet tweak-blog-masonry-image-placement-above tweak-blog-masonry-excerpt-hide tweak-blog-side-by-side-width-full tweak-blog-side-by-side-image-placement-left tweak-blog-side-by-side-image-aspect-ratio-11-square tweak-blog-side-by-side-primary-meta-categories tweak-blog-side-by-side-secondary-meta-date tweak-blog-side-by-side-meta-position-top tweak-blog-side-by-side-text-alignment-left tweak-blog-side-by-side-image-text-alignment-middle tweak-blog-side-by-side-read-more-style-show tweak-blog-side-by-side-delimiter-bullet tweak-blog-side-by-side-excerpt-show tweak-blog-single-column-width-full tweak-blog-single-column-text-alignment-left tweak-blog-single-column-image-placement-above tweak-blog-single-column-delimiter-bullet tweak-blog-single-column-read-more-style-show tweak-blog-single-column-primary-meta-date tweak-blog-single-column-secondary-meta-categories tweak-blog-single-column-meta-position-below-excerpt tweak-blog-single-column-content-excerpt-and-title tweak-events-stacked-width-inset tweak-events-stacked-height-small tweak-events-stacked-show-past-events tweak-events-stacked-show-thumbnails tweak-events-stacked-thumbnail-size-32-standard tweak-events-stacked-date-style-side-tag tweak-events-stacked-show-time tweak-events-stacked-show-location tweak-events-stacked-ical-gcal-links tweak-events-stacked-show-excerpt tweak-global-animations-enabled tweak-global-animations-complexity-level-basic tweak-global-animations-animation-style-fade tweak-global-animations-animation-type-flex tweak-global-animations-animation-curve-ease tweak-portfolio-grid-basic-width-full tweak-portfolio-grid-basic-height-small tweak-portfolio-grid-basic-image-aspect-ratio-43-four-three tweak-portfolio-grid-basic-text-alignment-left tweak-portfolio-grid-basic-hover-effect-zoom tweak-portfolio-grid-overlay-width-inset tweak-portfolio-grid-overlay-height-small tweak-portfolio-grid-overlay-image-aspect-ratio-43-four-three tweak-portfolio-grid-overlay-text-placement-center tweak-portfolio-grid-overlay-show-text-after-hover tweak-portfolio-index-background-link-format-stacked tweak-portfolio-index-background-width-full-bleed tweak-portfolio-index-background-height-large tweak-portfolio-index-background-vertical-alignment-middle tweak-portfolio-index-background-horizontal-alignment-center tweak-portfolio-index-background-delimiter-none tweak-portfolio-index-background-animation-type-fade tweak-portfolio-index-background-animation-duration-medium tweak-portfolio-hover-follow-layout-inline tweak-portfolio-hover-follow-delimiter-bullet tweak-portfolio-hover-follow-animation-type-fade tweak-portfolio-hover-follow-animation-duration-fast tweak-portfolio-hover-static-layout-inline tweak-portfolio-hover-static-front tweak-portfolio-hover-static-delimiter-hyphen tweak-portfolio-hover-static-animation-type-fade tweak-portfolio-hover-static-animation-duration-fast tweak-product-basic-item-product-variant-display-dropdown tweak-product-basic-item-product-subscription-display-radio tweak-product-basic-item-product-subscription-border-shape-square tweak-product-basic-item-width-full tweak-product-basic-item-gallery-aspect-ratio-34-three-four-vertical tweak-product-basic-item-text-alignment-left tweak-product-basic-item-navigation-breadcrumbs tweak-product-basic-item-description-position-below-price tweak-product-basic-item-description-position-mobile-below-add-to-cart-button tweak-product-basic-item-content-alignment-top tweak-product-basic-item-gallery-design-slideshow tweak-product-basic-item-gallery-placement-left tweak-product-basic-item-thumbnail-placement-side tweak-product-basic-item-click-action-none tweak-product-basic-item-hover-action-none tweak-product-basic-item-variant-picker-layout-dropdowns tweak-product-basic-item-add-to-cart-standalone tweak-product-basic-item-add-to-cart-mobile-standalone tweak-products-width-inset tweak-products-image-aspect-ratio-11-square tweak-products-text-alignment-left tweak-products-price-show tweak-products-nested-category-type-top tweak-products-category-title tweak-products-header-text-alignment-middle tweak-products-breadcrumbs image-block-poster-text-alignment-center image-block-card-content-position-center image-block-card-text-alignment-left image-block-overlap-content-position-center image-block-overlap-text-alignment-left image-block-collage-content-position-top image-block-collage-text-alignment-left image-block-stack-text-alignment-left hide-opentable-icons opentable-style-dark tweak-product-quick-view-button-style-floating tweak-product-quick-view-button-position-bottom tweak-product-quick-view-lightbox-excerpt-display-truncate tweak-product-quick-view-lightbox-show-arrows tweak-product-quick-view-lightbox-show-close-button tweak-product-quick-view-lightbox-controls-weight-light native-currency-code-gbp collection-type-blog-single-column collection-layout-default collection-63ce12ca4c8b242c83b69033 view-list mobile-style-available sqs-seven-one " tabindex="-1" > <div id="siteWrapper" class="clearfix site-wrapper" > <div id="floatingCart" class="floating-cart hidden"> <a href="/cart" class="icon icon--stroke icon--fill icon--cart sqs-custom-cart"> <span class="Cart-inner"> <svg class="icon icon--cart" viewBox="0 0 31 24"> <g class="svg-icon cart-icon--odd"> <circle fill="none" stroke-miterlimit="10" cx="22.5" cy="21.5" r="1"/> <circle fill="none" stroke-miterlimit="10" cx="9.5" cy="21.5" r="1"/> <path fill="none" stroke-miterlimit="10" d="M0,1.5h5c0.6,0,1.1,0.4,1.1,1l1.7,13 c0.1,0.5,0.6,1,1.1,1h15c0.5,0,1.2-0.4,1.4-0.9l3.3-8.1c0.2-0.5-0.1-0.9-0.6-0.9H12"/> </g> </svg> <div class="legacy-cart icon-cart-quantity"> <span class="sqs-cart-quantity">0</span> </div> </span> </a> </div> <header data-test="header" id="header" class=" black-bold header theme-col--primary " data-section-theme="black-bold" data-controller="Header" data-current-styles="{ "layout": "navRight", "action": { "buttonText": "Get Started", "newWindow": false }, "showSocial": true, "socialOptions": { "socialBorderShape": "none", "socialBorderStyle": "outline", "socialBorderThickness": { "unit": "px", "value": 1.0 } }, "sectionTheme": "black-bold", "menuOverlayTheme": "black", "menuOverlayAnimation": "fade", "cartStyle": "cart", "cartText": "Cart", "showEmptyCartState": true, "cartOptions": { "iconType": "stroke-1", "cartBorderShape": "none", "cartBorderStyle": "outline", "cartBorderThickness": { "unit": "px", "value": 1.0 } }, "showButton": false, "showCart": false, "showAccountLogin": true, "headerStyle": "solid", "languagePicker": { "enabled": false, "iconEnabled": false, "iconType": "globe", "flagShape": "shiny", "languageFlags": [ ] }, "iconOptions": { "thickness": { "unit": "px", "value": 2.0 }, "endcapType": "square", "desktopDropdownIconOptions": { "folderIcon": "none", "languagePickerIcon": "openArrowHead" }, "mobileDropdownIconOptions": { "folderIcon": "none", "languagePickerIcon": "openArrowHead" } }, "mobileOptions": { "layout": "logoLeftNavRight", "menuIconOptions": { "style": "halfLineHamburger", "thickness": { "unit": "px", "value": 2.0 } } }, "solidOptions": { "headerOpacity": { "unit": "%", "value": 100.0 }, "blurBackground": { "enabled": false, "blurRadius": { "unit": "px", "value": 12.0 } }, "backgroundColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "black", "alphaModifier": 1.0 } }, "navigationColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "accent", "alphaModifier": 1.0 } } }, "gradientOptions": { "gradientType": "faded", "headerOpacity": { "unit": "%", "value": 90.0 }, "blurBackground": { "enabled": false, "blurRadius": { "unit": "px", "value": 12.0 } }, "backgroundColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "white", "alphaModifier": 1.0 } }, "navigationColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "black", "alphaModifier": 1.0 } } }, "dropShadowOptions": { "enabled": false, "blur": { "unit": "px", "value": 30.0 }, "spread": { "unit": "px", "value": 0.0 }, "distance": { "unit": "px", "value": 0.0 }, "color": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "black", "alphaModifier": 1.0 } } }, "borderOptions": { "enabled": false, "position": "bottom", "thickness": { "unit": "px", "value": 1.0 }, "color": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "lightAccent", "alphaModifier": 1.0 } } }, "showPromotedElement": false, "buttonVariant": "primary", "blurBackground": { "enabled": false, "blurRadius": { "unit": "px", "value": 12.0 } }, "headerOpacity": { "unit": "%", "value": 100.0 } }" data-section-id="header" data-header-style="solid" data-language-picker="{ "enabled": false, "iconEnabled": false, "iconType": "globe", "flagShape": "shiny", "languageFlags": [ ] }" data-first-focusable-element tabindex="-1" style=" --headerDropShadowColor: hsla(var(--black-hsl), 1); --headerBorderColor: hsla(var(--lightAccent-hsl), 1); --solidHeaderBackgroundColor: hsla(var(--black-hsl), 1); --solidHeaderNavigationColor: hsla(var(--accent-hsl), 1); --gradientHeaderBackgroundColor: hsla(var(--white-hsl), 1); --gradientHeaderNavigationColor: hsla(var(--black-hsl), 1); " > <div class="sqs-announcement-bar-dropzone"></div> <div class="header-announcement-bar-wrapper"> <a href="#page" class="header-skip-link sqs-button-element--primary" > Skip to Content </a> <style> @supports (-webkit-backdrop-filter: none) or (backdrop-filter: none) { .header-blur-background { -webkit-backdrop-filter: blur(12px); backdrop-filter: blur(12px); } } </style> <div class="header-border" data-header-style="solid" data-header-border="false" data-test="header-border" style=" " ></div> <div class="header-dropshadow" data-header-style="solid" data-header-dropshadow="false" data-test="header-dropshadow" style="" ></div> <div > <div class="header-background-solid" data-header-style="solid" data-test="header-background-solid" style="opacity: calc(100 * .01)" ></div> </div> <div class='header-inner container--fluid header-mobile-layout-logo-left-nav-right header-layout-nav-right ' data-test="header-inner" > <!-- Background --> <div class="header-background theme-bg--primary"></div> <div class="header-display-desktop" data-content-field="site-title"> <!-- Social --> <!-- Title and nav wrapper --> <div class="header-title-nav-wrapper"> <!-- Title --> <div class=" header-title header-title--use-mobile-logo " data-animation-role="header-element" > <div class="header-title-logo"> <a href="/" data-animation-role="header-element"> <picture><source media="only screen and (pointer: coarse) and (max-width: 1024px), screen and (max-width: 799px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png?format=1500w"><source media="only screen and (pointer: coarse) and (min-width: 1025px), screen and (min-width: 800px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png?format=1500w"> <img elementtiming="nbf-header-logo-desktop" src="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png?format=1500w" alt="EleutherAI" style="display:block" fetchpriority="high" loading="eager" decoding="async" data-loader="raw"></picture> </a> </div> <div class="header-mobile-logo"> <a href="/" data-animation-role="header-element"> <picture><source media="only screen and (pointer: coarse) and (max-width: 1024px), screen and (max-width: 799px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png?format=1500w"><source media="only screen and (pointer: coarse) and (min-width: 1025px), screen and (min-width: 800px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png?format=1500w"> <img elementtiming="nbf-header-logo-mobile" src="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png?format=1500w" alt="EleutherAI" style="display:block" fetchpriority="high" loading="eager" decoding="async" data-loader="raw"></picture> </a> </div> </div> <!-- Nav --> <div class="header-nav"> <div class="header-nav-wrapper"> <nav class="header-nav-list"> <div class="header-nav-item header-nav-item--folder"> <a class="header-nav-folder-title" href="/about-folder/" tabindex="-1" data-animation-role="header-element" > About </a> <div class="header-nav-folder-content"> <div class="header-nav-folder-item"> <a href="/community" > <span class="header-nav-folder-item-content"> Community </span> </a> </div> <div class="header-nav-folder-item"> <a href="/staff" > <span class="header-nav-folder-item-content"> Staff </span> </a> </div> </div> </div> <div class="header-nav-item header-nav-item--folder"> <a class="header-nav-folder-title" href="/research-folder/" tabindex="-1" data-animation-role="header-element" > Research </a> <div class="header-nav-folder-content"> <div class="header-nav-folder-item"> <a href="/language-modeling" > <span class="header-nav-folder-item-content"> Language Modeling </span> </a> </div> <div class="header-nav-folder-item"> <a href="/interpretability" > <span class="header-nav-folder-item-content"> Interpretability </span> </a> </div> <div class="header-nav-folder-item"> <a href="/alignment" > <span class="header-nav-folder-item-content"> Alignment </span> </a> </div> </div> </div> <div class="header-nav-item header-nav-item--collection"> <a href="/papers" data-animation-role="header-element" > Papers </a> </div> <div class="header-nav-item header-nav-item--collection"> <a href="/releases" data-animation-role="header-element" > Releases </a> </div> <div class="header-nav-item header-nav-item--external"> <a href="https://blog.eleuther.ai" data-animation-role="header-element">Blog</a> </div> </nav> </div> </div> </div> <!-- Actions --> <div class="header-actions header-actions--right"> <div class="header-actions-action header-actions-action--social"> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="mailto:contact@eleuther.ai" target="_blank" aria-label="contact@eleuther.ai"> <svg viewBox="23 23 64 64"> <use xlink:href="#email-icon" width="110" height="110"></use> </svg> </a> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://discord.gg/zBGx3azzUn" target="_blank" aria-label="Discord"> <svg viewBox="23 23 64 64"> <use xlink:href="#discord-unauth-icon" width="110" height="110"></use> </svg> </a> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://github.com/EleutherAI" target="_blank" aria-label="GitHub"> <svg viewBox="23 23 64 64"> <use xlink:href="#github-unauth-icon" width="110" height="110"></use> </svg> </a> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://twitter.com/AiEleuther" target="_blank" aria-label="Twitter"> <svg viewBox="23 23 64 64"> <use xlink:href="#twitter-unauth-icon" width="110" height="110"></use> </svg> </a> </div> <div class="showOnMobile"> </div> <div class="showOnDesktop"> </div> </div> <style> .top-bun, .patty, .bottom-bun { height: 2px; } </style> <!-- Burger --> <div class="header-burger menu-overlay-has-visible-non-navigation-items " data-animation-role="header-element"> <button class="header-burger-btn burger" data-test="header-burger"> <span hidden class="js-header-burger-open-title visually-hidden">Open Menu</span> <span hidden class="js-header-burger-close-title visually-hidden">Close Menu</span> <div class="burger-box"> <div class="burger-inner header-menu-icon-halfLineHamburger navRight"> <div class="top-bun"></div> <div class="patty"></div> <div class="bottom-bun"></div> </div> </div> </button> </div> </div> <div class="header-display-mobile" data-content-field="site-title"> <!-- Social --> <!-- Title and nav wrapper --> <div class="header-title-nav-wrapper"> <!-- Title --> <div class=" header-title header-title--use-mobile-logo " data-animation-role="header-element" > <div class="header-title-logo"> <a href="/" data-animation-role="header-element"> <picture><source media="only screen and (pointer: coarse) and (max-width: 1024px), screen and (max-width: 799px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png?format=1500w"><source media="only screen and (pointer: coarse) and (min-width: 1025px), screen and (min-width: 800px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png?format=1500w"> <img elementtiming="nbf-header-logo-desktop" src="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png?format=1500w" alt="EleutherAI" style="display:block" fetchpriority="high" loading="eager" decoding="async" data-loader="raw"></picture> </a> </div> <div class="header-mobile-logo"> <a href="/" data-animation-role="header-element"> <picture><source media="only screen and (pointer: coarse) and (max-width: 1024px), screen and (max-width: 799px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png?format=1500w"><source media="only screen and (pointer: coarse) and (min-width: 1025px), screen and (min-width: 800px)" srcset="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/130220ca-617d-4834-b7ab-d0b3bc6a4668/eleutherai+full+logo+6.png?format=1500w"> <img elementtiming="nbf-header-logo-mobile" src="//images.squarespace-cdn.com/content/v1/6343e7de9a7c4b05ef290bd4/b89e8bc1-32b7-4a97-8dac-351af3d221ed/eleutherai+logo.png?format=1500w" alt="EleutherAI" style="display:block" fetchpriority="high" loading="eager" decoding="async" data-loader="raw"></picture> </a> </div> </div> <!-- Nav --> <div class="header-nav"> <div class="header-nav-wrapper"> <nav class="header-nav-list"> <div class="header-nav-item header-nav-item--folder"> <a class="header-nav-folder-title" href="/about-folder/" tabindex="-1" data-animation-role="header-element" > About </a> <div class="header-nav-folder-content"> <div class="header-nav-folder-item"> <a href="/community" > <span class="header-nav-folder-item-content"> Community </span> </a> </div> <div class="header-nav-folder-item"> <a href="/staff" > <span class="header-nav-folder-item-content"> Staff </span> </a> </div> </div> </div> <div class="header-nav-item header-nav-item--folder"> <a class="header-nav-folder-title" href="/research-folder/" tabindex="-1" data-animation-role="header-element" > Research </a> <div class="header-nav-folder-content"> <div class="header-nav-folder-item"> <a href="/language-modeling" > <span class="header-nav-folder-item-content"> Language Modeling </span> </a> </div> <div class="header-nav-folder-item"> <a href="/interpretability" > <span class="header-nav-folder-item-content"> Interpretability </span> </a> </div> <div class="header-nav-folder-item"> <a href="/alignment" > <span class="header-nav-folder-item-content"> Alignment </span> </a> </div> </div> </div> <div class="header-nav-item header-nav-item--collection"> <a href="/papers" data-animation-role="header-element" > Papers </a> </div> <div class="header-nav-item header-nav-item--collection"> <a href="/releases" data-animation-role="header-element" > Releases </a> </div> <div class="header-nav-item header-nav-item--external"> <a href="https://blog.eleuther.ai" data-animation-role="header-element">Blog</a> </div> </nav> </div> </div> </div> <!-- Actions --> <div class="header-actions header-actions--right"> <div class="header-actions-action header-actions-action--social"> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="mailto:contact@eleuther.ai" target="_blank" aria-label="contact@eleuther.ai"> <svg viewBox="23 23 64 64"> <use xlink:href="#email-icon" width="110" height="110"></use> </svg> </a> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://discord.gg/zBGx3azzUn" target="_blank" aria-label="Discord"> <svg viewBox="23 23 64 64"> <use xlink:href="#discord-unauth-icon" width="110" height="110"></use> </svg> </a> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://github.com/EleutherAI" target="_blank" aria-label="GitHub"> <svg viewBox="23 23 64 64"> <use xlink:href="#github-unauth-icon" width="110" height="110"></use> </svg> </a> <a class="icon icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://twitter.com/AiEleuther" target="_blank" aria-label="Twitter"> <svg viewBox="23 23 64 64"> <use xlink:href="#twitter-unauth-icon" width="110" height="110"></use> </svg> </a> </div> <div class="showOnMobile"> </div> <div class="showOnDesktop"> </div> </div> <style> .top-bun, .patty, .bottom-bun { height: 2px; } </style> <!-- Burger --> <div class="header-burger menu-overlay-has-visible-non-navigation-items " data-animation-role="header-element"> <button class="header-burger-btn burger" data-test="header-burger"> <span hidden class="js-header-burger-open-title visually-hidden">Open Menu</span> <span hidden class="js-header-burger-close-title visually-hidden">Close Menu</span> <div class="burger-box"> <div class="burger-inner header-menu-icon-halfLineHamburger navRight"> <div class="top-bun"></div> <div class="patty"></div> <div class="bottom-bun"></div> </div> </div> </button> </div> </div> </div> </div> <!-- (Mobile) Menu Navigation --> <div class="header-menu header-menu--folder-list black " data-section-theme="black" data-current-styles="{ "layout": "navRight", "action": { "buttonText": "Get Started", "newWindow": false }, "showSocial": true, "socialOptions": { "socialBorderShape": "none", "socialBorderStyle": "outline", "socialBorderThickness": { "unit": "px", "value": 1.0 } }, "sectionTheme": "black-bold", "menuOverlayTheme": "black", "menuOverlayAnimation": "fade", "cartStyle": "cart", "cartText": "Cart", "showEmptyCartState": true, "cartOptions": { "iconType": "stroke-1", "cartBorderShape": "none", "cartBorderStyle": "outline", "cartBorderThickness": { "unit": "px", "value": 1.0 } }, "showButton": false, "showCart": false, "showAccountLogin": true, "headerStyle": "solid", "languagePicker": { "enabled": false, "iconEnabled": false, "iconType": "globe", "flagShape": "shiny", "languageFlags": [ ] }, "iconOptions": { "thickness": { "unit": "px", "value": 2.0 }, "endcapType": "square", "desktopDropdownIconOptions": { "folderIcon": "none", "languagePickerIcon": "openArrowHead" }, "mobileDropdownIconOptions": { "folderIcon": "none", "languagePickerIcon": "openArrowHead" } }, "mobileOptions": { "layout": "logoLeftNavRight", "menuIconOptions": { "style": "halfLineHamburger", "thickness": { "unit": "px", "value": 2.0 } } }, "solidOptions": { "headerOpacity": { "unit": "%", "value": 100.0 }, "blurBackground": { "enabled": false, "blurRadius": { "unit": "px", "value": 12.0 } }, "backgroundColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "black", "alphaModifier": 1.0 } }, "navigationColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "accent", "alphaModifier": 1.0 } } }, "gradientOptions": { "gradientType": "faded", "headerOpacity": { "unit": "%", "value": 90.0 }, "blurBackground": { "enabled": false, "blurRadius": { "unit": "px", "value": 12.0 } }, "backgroundColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "white", "alphaModifier": 1.0 } }, "navigationColor": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "black", "alphaModifier": 1.0 } } }, "dropShadowOptions": { "enabled": false, "blur": { "unit": "px", "value": 30.0 }, "spread": { "unit": "px", "value": 0.0 }, "distance": { "unit": "px", "value": 0.0 }, "color": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "black", "alphaModifier": 1.0 } } }, "borderOptions": { "enabled": false, "position": "bottom", "thickness": { "unit": "px", "value": 1.0 }, "color": { "type": "SITE_PALETTE_COLOR", "sitePaletteColor": { "colorName": "lightAccent", "alphaModifier": 1.0 } } }, "showPromotedElement": false, "buttonVariant": "primary", "blurBackground": { "enabled": false, "blurRadius": { "unit": "px", "value": 12.0 } }, "headerOpacity": { "unit": "%", "value": 100.0 } }" data-section-id="overlay-nav" data-show-account-login="true" data-test="header-menu"> <div class="header-menu-bg theme-bg--primary"></div> <div class="header-menu-nav"> <nav class="header-menu-nav-list"> <div data-folder="root" class="header-menu-nav-folder"> <div class="header-menu-nav-folder-content"> <!-- Menu Navigation --> <div class="header-menu-nav-wrapper"> <div class="container header-menu-nav-item"> <a data-folder-id="/about-folder/" href="/about-folder/" > <div class="header-menu-nav-item-content"> <span class="visually-hidden">Folder:</span> <span>About</span> <span class="chevron chevron--right"></span> </div> </a> </div> <div data-folder="/about-folder/" class="header-menu-nav-folder"> <div class="header-menu-nav-folder-content"> <div class="header-menu-controls container header-menu-nav-item"> <a class="header-menu-controls-control header-menu-controls-control--active" data-action="back" href="/"> <span class="chevron chevron--left"></span><span>Back</span> </a> </div> <div class="container header-menu-nav-item"> <a href="/community" > <div class="header-menu-nav-item-content"> Community </div> </a> </div> <div class="container header-menu-nav-item"> <a href="/staff" > <div class="header-menu-nav-item-content"> Staff </div> </a> </div> </div> </div> <div class="container header-menu-nav-item"> <a data-folder-id="/research-folder/" href="/research-folder/" > <div class="header-menu-nav-item-content"> <span class="visually-hidden">Folder:</span> <span>Research</span> <span class="chevron chevron--right"></span> </div> </a> </div> <div data-folder="/research-folder/" class="header-menu-nav-folder"> <div class="header-menu-nav-folder-content"> <div class="header-menu-controls container header-menu-nav-item"> <a class="header-menu-controls-control header-menu-controls-control--active" data-action="back" href="/"> <span class="chevron chevron--left"></span><span>Back</span> </a> </div> <div class="container header-menu-nav-item"> <a href="/language-modeling" > <div class="header-menu-nav-item-content"> Language Modeling </div> </a> </div> <div class="container header-menu-nav-item"> <a href="/interpretability" > <div class="header-menu-nav-item-content"> Interpretability </div> </a> </div> <div class="container header-menu-nav-item"> <a href="/alignment" > <div class="header-menu-nav-item-content"> Alignment </div> </a> </div> </div> </div> <div class="container header-menu-nav-item header-menu-nav-item--collection"> <a href="/papers" > <div class="header-menu-nav-item-content"> Papers </div> </a> </div> <div class="container header-menu-nav-item header-menu-nav-item--collection"> <a href="/releases" > <div class="header-menu-nav-item-content"> Releases </div> </a> </div> <div class="container header-menu-nav-item header-menu-nav-item--external"> <a href="https://blog.eleuther.ai" >Blog</a> </div> </div> </div> <div class="header-menu-actions social-accounts"> <div class="header-menu-actions-action header-menu-actions-action--social mobile"> <a class="icon icon--lg icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="mailto:contact@eleuther.ai" target="_blank" aria-label="contact@eleuther.ai"> <svg viewBox="23 23 64 64"> <use xlink:href="#email-icon" width="110" height="110"></use> </svg> </a> </div> <div class="header-menu-actions-action header-menu-actions-action--social mobile"> <a class="icon icon--lg icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://discord.gg/zBGx3azzUn" target="_blank" aria-label="Discord"> <svg viewBox="23 23 64 64"> <use xlink:href="#discord-unauth-icon" width="110" height="110"></use> </svg> </a> </div> <div class="header-menu-actions-action header-menu-actions-action--social mobile"> <a class="icon icon--lg icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://github.com/EleutherAI" target="_blank" aria-label="GitHub"> <svg viewBox="23 23 64 64"> <use xlink:href="#github-unauth-icon" width="110" height="110"></use> </svg> </a> </div> <div class="header-menu-actions-action header-menu-actions-action--social mobile"> <a class="icon icon--lg icon--fill header-icon header-icon-border-shape-none header-icon-border-style-outline" href="https://twitter.com/AiEleuther" target="_blank" aria-label="Twitter"> <svg viewBox="23 23 64 64"> <use xlink:href="#twitter-unauth-icon" width="110" height="110"></use> </svg> </a> </div> </div> </div> </nav> </div> </div> </header> <main id="page" class="container" role="main"> <article class="sections" id="sections" data-page-sections="63ce12ca4c8b242c83b6903c"> <section data-test="page-section" data-section-theme="black" class='page-section content-collection full-bleed-section collection-type-blog-single-column background-width--full-bleed section-height--medium content-width--wide horizontal-alignment--center vertical-alignment--middle black' data-section-id="63ce12ca4c8b242c83b6903e" data-controller="SectionWrapperController" data-current-styles="{ "imageOverlayOpacity": 0.15, "backgroundWidth": "background-width--full-bleed", "sectionHeight": "section-height--medium", "horizontalAlignment": "horizontal-alignment--center", "verticalAlignment": "vertical-alignment--middle", "contentWidth": "content-width--wide", "sectionTheme": "black", "sectionAnimation": "none", "backgroundMode": "image" }" data-current-context="{ "video": { "playbackSpeed": 0.5, "filter": 1, "filterStrength": 0, "zoom": 0, "videoSourceProvider": "none" }, "backgroundImageId": null, "backgroundMediaEffect": null, "divider": null, "typeName": "blog-single-column" }" data-animation="none" > <div class="section-border" > <div class="section-background"> </div> </div> <div class='content-wrapper' style=' ' > <div class="content" > <div class="blog-single-column collection-content-wrapper" data-controller="BlogImageLoader"> <div class="blog-single-column--wrapper"> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">12/02/2024</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">12/02/2024</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2402.07896" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Suppressing Pink Elephants with Direct Principle Feedback </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-65cb01285f5e1434ece9707e"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-3264c8191f96b745a88a"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Existing methods for controlling language models, such as RLHF and Constitutional AI, involve determining which LLM behaviors are desirable and training them into a language model. However, in many cases, it is desirable for LLMs to be controllable at <em>inference time</em>, so that they can be used in multiple contexts with diverse needs. We illustrate this with the <strong>Pink Elephant Problem</strong>: instructing an LLM to avoid discussing a certain entity (a ``Pink Elephant''), and instead discuss a preferred entity (``Grey Elephant''). We apply a novel simplification of Constitutional AI, <strong>Direct Principle Feedback</strong>, which skips the ranking of responses and uses DPO directly on critiques and revisions. Our results show that after DPF fine-tuning on our synthetic Pink Elephants dataset, our 13B fine-tuned LLaMA 2 model significantly outperforms Llama-2-13B-Chat and a prompted baseline, and performs as well as GPT-4 in on our curated test set assessing the Pink Elephant Problem.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2402.07896" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">06/02/2024</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">06/02/2024</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2402.04362" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Neural networks learn moments of increasing order </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-65cb00bc231ba54ead68a7ce"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-e8aaaf9deb13df53bcc0"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">The distributional simplicity bias (DSB) posits that neural networks learn low-order moments of the data distribution first, before moving on to higher-order correlations. In this work, we present compelling new evidence for the DSB by showing that networks automatically learn to perform well on maximum-entropy distributions whose low-order statistics match those of the training set early in training, then lose this ability later. We also extend the DSB to discrete domains by proving an equivalence between token <em>n</em>-gram frequencies and the moments of embedding vectors, and by finding empirical evidence for the bias in LLMs. Finally we use optimal transport methods to surgically edit the low-order statistics of one class to match those of another, and show that early-training networks treat the edited samples as if they were drawn from the target class. Code is available at <a href="https://github.com/EleutherAI/features-across-time">this https URL</a>.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2402.04362" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/10/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/10/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2310.15154" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Linear Representations of Sentiment in Large Language Models </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6537fc4aa475a54f8ff74c4e"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-caa0cd863f1be1e10036"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Sentiment is a pervasive feature in natural language text, yet it is an open question how sentiment is represented within Large Language Models (LLMs). In this study, we reveal that across a range of models, sentiment is represented linearly: a single direction in activation space mostly captures the feature across a range of tasks with one extreme for positive and the other for negative. Through causal interventions, we isolate this direction and show it is causally relevant in both toy tasks and real world datasets such as Stanford Sentiment Treebank. Through this case study we model a thorough investigation of what a single direction means on a broad data distribution. </p><p class="" style="white-space:pre-wrap;">We further uncover the mechanisms that involve this direction, highlighting the roles of a small subset of attention heads and neurons. Finally, we discover a phenomenon which we term the summarization motif: sentiment is not solely represented on emotionally charged words, but is additionally summarized at intermediate positions without inherent sentiment, such as punctuation and names. We show that in Stanford Sentiment Treebank zero-shot classification, 76% of above-chance classification accuracy is lost when ablating the sentiment direction, nearly half of which (36%) is due to ablating the summarized sentiment direction exclusively at comma positions.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2310.15154" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">31/08/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">31/08/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2309.00071" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> YaRN: Efficient Context Window Extension of Large Language Models </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6501fc9b3ab3304103f24640"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-045c31ad064e99310aba"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Rotary Position Embeddings (RoPE) have been shown to effectively encode positional information in transformer-based language models. However, these models fail to generalize past the sequence length they were trained on. We present YaRN (Yet another RoPE extensioN method), a compute-efficient method to extend the context window of such models, requiring 10x less tokens and 2.5x less training steps than previous methods. Using YaRN, we show that LLaMA models can effectively utilize and extrapolate to context lengths much longer than their original pre-training would allow, while also surpassing previous the state-of-the-art at context window extension. In addition, we demonstrate that YaRN exhibits the capability to extrapolate beyond the limited context of a fine-tuning dataset. We publish the checkpoints of Llama 2 7B/13B fine-tuned using YaRN with 64k and 128k context windows at <a href="https://github.com/jquesnelle/yarn">this https URL</a></p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2309.00071" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">30/06/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">30/06/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2306.17806" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Stay on topic with Classifier-Free Guidance </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-64a57a1c9d28df4f2cb48bad"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-c5b038205c2169b8e7ed"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Classifier-Free Guidance (CFG) [37] has recently emerged in text-to-image generation as a lightweight technique to encourage prompt-adherence in generations. In this work, we demonstrate that CFG can be used broadly as an inference-time technique in pure language modeling. We show that CFG (1) improves the performance of Pythia, GPT-2 and LLaMA-family models across an array of tasks: Q&A, reasoning, code generation, and machine translation, achieving SOTA on LAMBADA with LLaMA-7B over PaLM-540B; (2) brings improvements equivalent to a model with twice the parameter-count; (3) can stack alongside other inference-time methods like Chain-of-Thought and Self-Consistency, yielding further improvements in difficult tasks; (4) can be used to increase the faithfulness and coherence of assistants in challenging form-driven and content-driven prompts: in a human evaluation we show a 75% preference for GPT4All using CFG over baseline</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2306.17806" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">07/06/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">07/06/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2306.02254" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> A Technical Report for Polyglot-Ko: Open-Source Large-Scale Korean Language Models </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-647ff3657f6d6b3544cb15d9"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-e0987a69e4fbce13bc5b"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Polyglot is a pioneering project aimed at enhancing the non-English language performance of multilingual language models. Despite the availability of various multilingual models such as mBERT (Devlin et al., 2019), XGLM (Lin et al., 2022), and BLOOM (Scao et al., 2022), researchers and developers often resort to building monolingual models in their respective languages due to the dissatisfaction with the current multilingual models non-English language capabilities. Addressing this gap, we seek to develop advanced multilingual language models that offer improved performance in non-English languages. In this paper, we introduce the Polyglot Korean models, which represent a specific focus rather than being multilingual in nature. In collaboration with TUNiB, our team collected 1.2TB of Korean data meticulously curated for our research journey. We made a deliberate decision to prioritize the development of Korean models before venturing into multilingual models. This choice was motivated by multiple factors: firstly, the Korean models facilitated performance comparisons with existing multilingual models; and finally, they catered to the specific needs of Korean companies and researchers. This paper presents our work in developing the Polyglot Korean models, which propose some steps towards addressing the non-English language performance gap in multilingual language models.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2306.02254" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">25/05/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">25/05/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2305.16367" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Role-Play with Large Language Models </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-64a82037a3fb062a79906f30"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-3135a498e54e2ff14d12"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">As dialogue agents become increasingly human-like in their performance, it is imperative that we develop effective ways to describe their behaviour in high-level terms without falling into the trap of anthropomorphism. In this paper, we foreground the concept of role-play. Casting dialogue agent behaviour in terms of role-play allows us to draw on familiar folk psychological terms, without ascribing human characteristics to language models they in fact lack. Two important cases of dialogue agent behaviour are addressed this way, namely (apparent) deception and (apparent) self-awareness.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2305.16367" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/05/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/05/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2305.14699" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Can Transformers Learn to Solve Problems Recursively? </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-646e6a4263c5672c1c378d3f"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-b1c75114429edfcebf69"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Neural networks have in recent years shown promise for helping software engineers write programs and even formally verify them. While semantic information plays a crucial part in these processes, it remains unclear to what degree popular neural architectures like transformers are capable of modeling that information.</p><p class="" style="white-space:pre-wrap;">This paper examines the behavior of neural networks learning algorithms relevant to programs and formal verification proofs through the lens of mechanistic interpretability, focusing in particular on structural recursion. Structural recursion is at the heart of tasks on which symbolic tools currently outperform neural models, like inferring semantic relations between datatypes and emulating program behavior. </p><p class="" style="white-space:pre-wrap;">We evaluate the ability of transformer models to learn to emulate the behavior of structurally recursive functions from input-output examples. Our evaluation includes empirical and conceptual analyses of the limitations and capabilities of transformer models </p><p class="" style="white-space:pre-wrap;">in approximating these functions, as well as reconstructions of the “shortcut” algorithms the model learns. By reconstructing these algorithms, we are able to <em>correctly predict</em> 91% of failure cases for one of the approximated functions. Our work provides a new foundation for understanding the behavior of neural networks that fail to solve the very tasks they are trained for.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2305.14699" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">04/05/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">04/05/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> StarCoder: May the Source be With You! </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6453f7d2b4fac62861c7e0db"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-232066892569ec385803"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">The BigCode community, an open-scientific collaboration working on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder and StarCoderBase: 15.5B parameter models with 8K context length, infilling capabilities and fast large-batch inference enabled by multi-query attention. StarCoderBase is trained on 1 trillion tokens sourced from The Stack (Kocetkov et al., 2022), a large collection of permissively licensed GitHub repositories with inspection tools and an opt-out process. We fine-tuned StarCoderBase on 35B Python tokens, resulting in the creation of StarCoder. We perform the most comprehensive evaluation of Code LLMs to date and show that StarCoderBase outperforms every open Code LLM that supports multiple programming languages and matches or outperforms the OpenAI code-cushman-001 model. Furthermore, StarCoder outperforms every model that is fine-tuned on Python, can be prompted to achieve 40% pass@1 on HumanEval, and still retains its performance on other programming languages. We take several important steps towards a safe open-access model release, including an improved PII redaction pipeline and a novel attribution tracing tool, and make the StarCoder models publicly available under a more commercially viable version of the Open Responsible AI Model license.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">02/03/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">02/03/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2303.08112" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Eliciting Latent Predictions from Transformers with the Tuned Lens </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-63e47f91024566490a4e7840"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-ffe13f9f2f78a47ef6c6"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">We analyze transformers from the perspective of iterative inference, seeking to understand how model predictions are refined layer by layer. To do so, we train an affine probe for each block in a frozen pretrained model, making it possible to decode every hidden state into a distribution over the vocabulary. Our method, the <em>tuned lens</em>, is a refinement of the earlier ``logit lens'' technique, which yielded useful insights but is often brittle. </p><p class="" style="white-space:pre-wrap;">We test our method on various autoregressive language models with up to 20B parameters, showing it to be more predictive, reliable and unbiased than the logit lens. With causal experiments, we show the tuned lens uses similar features to the model itself. We also find the trajectory of latent predictions can be used to detect malicious inputs with high accuracy. All code needed to reproduce our results can be found at <a href="https://github.com/AlignmentResearch/tuned-lens">here</a>.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2303.08112" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/02/2023</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/02/2023</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2302.12433" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> ProofNet: Autoformalizing and Formally Proving Undergraduate-Level Mathematics </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Azerbayev, Piotrowski, Schoelkopf, Ayers, Radev, and Avigad. "ProofNet: Autoformalizing and Formally Proving Undergraduate-Level Mathematics." <em>arXiv preprint arXiv:2302.12433</em> (2023).</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6400466b3c3ea70cfb60043f"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-e318c8c5f54b308dd40b"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">We introduce ProofNet, a benchmark for autoformalization and formal proving of undergraduate-level mathematics. The ProofNet benchmarks consists of 371 examples, each consisting of a formal theorem statement in Lean 3, a natural language theorem statement, and a natural language proof. The problems are primarily drawn from popular undergraduate pure mathematics textbooks and cover topics such as real and complex analysis, linear algebra, abstract algebra, and topology. We intend for ProofNet to be a challenging benchmark that will drive progress in autoformalization and automatic theorem proving. We report baseline results on statement autoformalization via in-context learning. Moreover, we introduce two novel statement autoformalization methods: prompt retrieval and distilled backtranslation.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2302.12433" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">19/12/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">19/12/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2212.09535" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> BLOOM+1: Adding Language Support to BLOOM for Zero-Shot Prompting </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Yong, Schoelkopf, Muennighoff, et al. "BLOOM+1: Adding Language Support to BLOOM for Zero-Shot Prompting." <em>arXiv preprint arXiv:2212.09535</em> (2022).</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-63dd75b4a42fb31c377915de"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-9e001d9405161747a710"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">The BLOOM model is a large open-source multilingual language model capable of zero-shot learning, but its pretraining was limited to 46 languages. To improve its zero-shot performance on unseen languages, it is desirable to adapt BLOOM, but previous works have only explored adapting small language models. In this work, we apply existing language adaptation strategies to BLOOM and benchmark its zero-shot prompting performance on eight new languages. We find language adaptation to be effective at improving zero-shot performance in new languages. Surprisingly, adapter-based finetuning is more effective than continued pretraining for large models. In addition, we discover that prompting performance is not significantly affected by language specifics, such as the writing system. It is primarily determined by the size of the language adaptation data. We also add new languages to BLOOMZ, which is a multitask finetuned version of BLOOM capable of following task instructions zero-shot. We find including a new language in the multitask fine-tuning mixture to be the most effective method to teach BLOOMZ a new language. We conclude that with sufficient training data language adaptation can generalize well to diverse languages. Our code is available at <a href="https://github.com/bigscience-workshop/multilingual-modeling/">this URL</a>.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2212.09535" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/11/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">24/11/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2211.12737" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> RoentGen: Vision-Language Foundation Model for Chest X-ray Generation </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Pierre Chambon, Christian Bluethgen, Jean-Benoit Delbrouck, Rogier Van der Sluijs, Małgorzata Połacin, Juan Manuel Zambrano Chaves, Tanishq Mathew Abraham, Shivanshu Purohit, Curtis P. Langlotz, Akshay Chaudhari. "RoentGen: Vision-Language Foundation Model for Chest X-ray Generation." arXiv preprint arXiv:2211.12737 (2022)</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-639a8a5cc5f8f879a2e9a2f2"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-b74b146d66e4d05e57fc"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Multimodal models trained on large natural image-text pair datasets have exhibited astounding abilities in generating high-quality images. Medical imaging data is fundamentally different to natural images, and the language used to succinctly capture relevant details in medical data uses a different, narrow but semantically rich, domain-specific vocabulary. Not surprisingly, multi-modal models trained on natural image-text pairs do not tend to generalize well to the medical domain. Developing generative imaging models faithfully representing medical concepts while providing compositional diversity could mitigate the existing paucity of high-quality, annotated medical imaging datasets. In this work, we develop a strategy to overcome the large natural-medical distributional shift by adapting a pre-trained latent diffusion model on a corpus of publicly available chest x-rays (CXR) and their corresponding radiology (text) reports. We investigate the model's ability to generate high-fidelity, diverse synthetic CXR conditioned on text prompts. We assess the model outputs quantitatively using image quality metrics, and evaluate image quality and text-image alignment by human domain experts. We present evidence that the resulting model (RoentGen) is able to create visually convincing, diverse synthetic CXR images, and that the output can be controlled to a new extent by using free-form text prompts including radiology-specific language. Fine-tuning this model on a fixed training set and using it as a data augmentation method, we measure a 5% improvement of a classifier trained jointly on synthetic and real images, and a 3% improvement when trained on a larger but purely synthetic training set. Finally, we observe that this fine-tuning distills in-domain knowledge in the text-encoder and can improve its representation capabilities of certain diseases like pneumothorax by 25%.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2211.12737" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">10/11/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">10/11/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2211.05100" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> BLOOM: A 176B-Parameter Open-Access Multilingual Language Model </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Le Scao, et al. (incl. Tow, Biderman, Ammanamanchi, Gao, Sutawika, Teehan). "BLOOM: A 176B-Parameter Open-Access Multilingual Language Model." arXiv preprint arXiv: 2211.05100, 2022.</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-639a87dbe2e5bd05e38c2bc4"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-5486ae78d6e0de6dc583"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Large language models (LLMs) have been shown to be able to perform new tasks based on a few demonstrations or natural language instructions. While these capabilities have led to widespread adoption, most LLMs are developed by resource-rich organizations and are frequently kept from the public. As a step towards democratizing this powerful technology, we present BLOOM, a 176B-parameter open-access language model designed and built thanks to a collaboration of hundreds of researchers. BLOOM is a decoder-only Transformer language model that was trained on the ROOTS corpus, a dataset comprising hundreds of sources in 46 natural and 13 programming languages (59 in total). We find that BLOOM achieves competitive performance on a wide variety of benchmarks, with stronger results after undergoing multitask prompted finetuning. To facilitate future research and applications using LLMs, we publicly release our models and code under the Responsible AI License.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2211.05100" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">03/11/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">03/11/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2211.01786 " class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Crosslingual Generalization through Multitask Fine Tuning </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Muennighoff, et al. (incl. Sutawika, Biderman, and Schoelkopf). "Crosslingual Generalization through Multitask Finetuning." arXiv preprint arXiv:2211.01786, 2022.</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-63a4805b14e37d1742619326"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-975e9430421f44321482"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Multitask prompted finetuning (MTF) has been shown to help large language models generalize to new tasks in a zero-shot setting, but so far explorations of MTF have focused on English data and models. We apply MTF to the pretrained multilingual BLOOM and mT5 model families to produce finetuned variants called BLOOMZ and mT0. We find finetuning large multilingual language models on English tasks with English prompts allows for task generalization to non-English languages that appear only in the pretraining corpus. Finetuning on multilingual tasks with English prompts further improves performance on English and non-English tasks leading to various state-of-the-art zero-shot results. We also investigate finetuning on multilingual tasks with prompts that have been machine-translated from English to match the language of each dataset. We find training on these machine-translated prompts leads to better performance on human-written prompts in the respective languages. Surprisingly, we find models are capable of zero-shot generalization to tasks in languages they have never intentionally seen. We conjecture that the models are learning higher-level capabilities that are both task- and language-agnostic. In addition, we introduce xP3, a composite of supervised datasets in 46 languages with English and machine-translated prompts. Our code, datasets and models are publicly available at <a href="https://github.com/bigscience-workshop/xmtf">this URL</a>.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2211.01786 " target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">15/10/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">15/10/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2210.07792" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Robust Preference Learning for Storytelling via Contrastive Reinforcement Learning </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-651e2404962f662b07f317b3"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-70d2cf242436603736b9"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Controlled automated story generation seeks to generate natural language stories satisfying constraints from natural language critiques or preferences. Existing methods to control for story preference utilize prompt engineering which is labor intensive and often inconsistent. They may also use logit-manipulation methods which require annotated datasets to exist for the desired attributes. To address these issues, we first train a contrastive bi-encoder model to align stories with corresponding human critiques, named CARP, building a general purpose preference model. This is subsequently used as a reward function to fine-tune a generative language model via reinforcement learning. However, simply fine-tuning a generative language model with a contrastive reward model does not always reliably result in a story generation system capable of generating stories that meet user preferences. To increase story generation robustness we further fine-tune the contrastive reward model using a prompt-learning technique. A human participant study is then conducted comparing generations from our full system, ablations, and two baselines. We show that the full fine-tuning pipeline results in a story generator preferred over a LLM 20x as large as well as logit-based methods. This motivates the use of contrastive learning for general purpose human preference modeling.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2210.07792" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">09/06/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">09/06/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2206.04615" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Beyond the Imitation Game: Quantifying and extrapolating the capacities of language models </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Srivastava, Aarohi, et al. (incl. Phang, Gao, and Biderman). "Beyond the Imitation Game: Quantifying and extrapolating the capabilities of language models." arXiv preprint arXiv:2206.04615, 2022.</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-639a7d337cecc963fd8539b1"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-c574de130d07d0b13e57"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Language models demonstrate both quantitative improvement and new qualitative capabilities with increasing scale. Despite their potentially transformative impact, these new capabilities are as yet poorly characterized. In order to inform future research, prepare for disruptive new model capabilities, and ameliorate socially harmful effects, it is vital that we understand the present and near-future capabilities and limitations of language models. To address this challenge, we introduce the Beyond the Imitation Game benchmark (BIG-bench). BIG-bench currently consists of 204 tasks, contributed by 442 authors across 132 institutions. Task topics are diverse, drawing problems from linguistics, childhood development, math, common-sense reasoning, biology, physics, social bias, software development, and beyond. BIG-bench focuses on tasks that are believed to be beyond the capabilities of current language models. We evaluate the behavior of OpenAI's GPT models, Google-internal dense transformer architectures, and Switch-style sparse transformers on BIG-bench, across model sizes spanning millions to hundreds of billions of parameters. In addition, a team of human expert raters performed all tasks in order to provide a strong baseline. Findings include: model performance and calibration both improve with scale, but are poor in absolute terms (and when compared with rater performance); performance is remarkably similar across model classes, though with benefits from sparsity; tasks that improve gradually and predictably commonly involve a large knowledge or memorization component, whereas tasks that exhibit "breakthrough" behavior at a critical scale often involve multiple steps or components, or brittle metrics; social bias typically increases with scale in settings with ambiguous context, but this can be improved with prompting.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2206.04615" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">25/01/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">25/01/2022</time> </span> </div> <h1 class="blog-title"> <a href="/papers-blog/documenting-geographically-and-contextually-diverse-data-sources" data-no-animation> Documenting geographically and contextually diverse data sources: The bigscience catalogue of language data and resources </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">McMillan-Major, Alyafeai, Biderman, et al. "Documenting Geographically and Contextually Diverse Data Sources: The BigScience Catalogue of Language Data and Resources." arXiv preprint arXiv:2201.10066, 2022.</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6399b52980800648445cfc7f"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-18d4ee3f5c0929c14f9b"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">In recent years, large-scale data collection efforts have prioritized the amount of data collected in order to improve the modeling capabilities of large language models. This prioritization, however, has resulted in concerns with respect to the rights of data subjects represented in data collections, particularly when considering the difficulty in interrogating these collections due to insufficient documentation and tools for analysis. Mindful of these pitfalls, we present our methodology for a documentation-first, human-centered data collection project as part of the BigScience initiative. We identified a geographically diverse set of target language groups (Arabic, Basque, Chinese, Catalan, English, French, Indic languages, Indonesian, Niger-Congo languages, Portuguese, Spanish, and Vietnamese, as well as programming languages) for which to collect metadata on potential data sources. To structure this effort, we developed our online catalogue as a supporting tool for gathering metadata through organized public hackathons. We present our development process; analyses of the resulting resource metadata, including distributions over languages, regions, and resource types; and our lessons learned in this endeavor.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link" href="/papers-blog/documenting-geographically-and-contextually-diverse-data-sources" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">13/01/2022</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">13/01/2022</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2201.07311" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> Datasheet for the Pile </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Stella Biderman, Kieran Bicheno, and Leo Gao. “Datasheet for the Pile.” Preprint, 2022.</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6399b40d8162eb4c6daf25ff"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-4708c477aa6b4f34b414"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">This datasheet describes the Pile, a 825 GiB dataset of human-authored text compiled by EleutherAI for use in large-scale language modeling. The Pile is comprised of 22 different text sources, ranging from original scrapes done for this project, to text data made available by the data owners, to third-party scrapes available online.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2201.07311" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> <article class="blog-single-column--container entry blog-item no-image-fade-in"> <div class="blog-single-column--text"> <div class="blog-meta-section"> <span class="blog-meta-primary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">03/11/2021</time> </span> <span class="blog-meta-delimiter"></span> <span class="blog-meta-delimiter blog-category-delimiter"></span> <span class="blog-meta-secondary"> <span class="blog-categories-list"> <a href="/papers-blog/category/arXiv" class="blog-categories">arXiv</a> </span> <span class="blog-author">Stella Biderman</span> <time class="blog-date" pubdate data-animation-role="date">03/11/2021</time> </span> </div> <h1 class="blog-title"> <a href="https://arxiv.org/abs/2111.02114" class="passthrough-link" target="_blank" rel="noopener" data-no-animation> LAION-400M: Open Dataset of CLIP-Filtered 400 Million Image-Text Pairs </a> </h1> <div class="blog-excerpt"> <div class="blog-excerpt-wrapper"><p class="" style="white-space:pre-wrap;">Christoph Schuhmann, Richard Vencu, Romain Beaumont, Robert Kaczmarczyk, Clayton Mullis, Aarush Katta, Theo Coombes, Jenia Jitsev, Aran Komatsuzaki. "LAION-400M: Open Dataset of CLIP-Filtered 400 Million Image-Text Pairs." arXiv preprint arXiv: 2111.02114, 2021</p></div> <div class="blog-body-wrapper"><div class="sqs-layout sqs-grid-12 columns-12" data-layout-label="Post Body" data-type="item" id="item-6398ad51e9388305b446f26b"><div class="row sqs-row"><div class="col sqs-col-12 span-12"><div class="sqs-block html-block sqs-block-html" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-b1c4866b971c46c3d3fc"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="" style="white-space:pre-wrap;">Multi-modal language-vision models trained on hundreds of millions of image-text pairs (e.g. CLIP, DALL-E) gained a recent surge, showing remarkable capability to perform zero- or few-shot learning and transfer even in absence of per-sample labels on target image data. Despite this trend, to date there has been no publicly available datasets of sufficient scale for training such models from scratch. To address this issue, in a community effort we build and release for public LAION-400M, a dataset with CLIP-filtered 400 million image-text pairs, their CLIP embeddings and kNN indices that allow efficient similarity search.</p> </div> </div></div></div></div></div></div> </div> <a class="blog-more-link passthrough-link" href="https://arxiv.org/abs/2111.02114" target="_blank" rel="noopener" data-animation-role="content">Read More</a> </div> </article> </div> <nav class="blog-list-pagination"> <div class="newer"> </div> <div class="older"> <a href="/papers-blog?offset=1635954600995&category=arXiv" rel="next"> <span class="next-label">Older Posts</span> <div class="blog-list-pagination-icon icon icon--stroke"> <svg class="caret-right-icon--small" viewBox="0 0 9 16"> <polyline fill="none" stroke-miterlimit="10" points="1.6,1.2 6.5,7.9 1.6,14.7 "/> </svg> </div> </a> </div> </nav> </div> </div> </div> </section> </article> <section id="itemPagination" class="item-pagination item-pagination--prev-next" data-collection-type="blog-single-column" > </section> </main> <footer class="sections" id="footer-sections" data-footer-sections> <section data-test="page-section" data-section-theme="black" class='page-section full-bleed-section layout-engine-section background-width--full-bleed section-height--small content-width--wide horizontal-alignment--center vertical-alignment--middle black' data-section-id="6343e7df9a7c4b05ef290bfe" data-controller="SectionWrapperController" data-current-styles="{ "imageOverlayOpacity": 0.15, "backgroundWidth": "background-width--full-bleed", "sectionHeight": "section-height--small", "customSectionHeight": 1, "horizontalAlignment": "horizontal-alignment--center", "verticalAlignment": "vertical-alignment--middle", "contentWidth": "content-width--wide", "customContentWidth": 50, "sectionTheme": "black", "sectionAnimation": "none", "backgroundMode": "image" }" data-current-context="{ "video": { "playbackSpeed": 0.5, "filter": 1, "filterStrength": 0, "zoom": 0, "videoSourceProvider": "none" }, "backgroundImageId": null, "backgroundMediaEffect": { "type": "none" }, "divider": null, "typeName": "blog-single-column" }" data-animation="none" data-fluid-engine-section > <div class="section-border" > <div class="section-background"> </div> </div> <div class='content-wrapper' style=' ' > <div class="content" > <div data-fluid-engine="true"><style> .fe-6343e7df9a7c4b05ef290bfd { --grid-gutter: calc(var(--sqs-mobile-site-gutter, 6vw) - 0.0px); --cell-max-width: calc( ( var(--sqs-site-max-width, 1500px) - (0.0px * (8 - 1)) ) / 8 ); display: grid; position: relative; grid-area: 1/1/-1/-1; grid-template-rows: repeat(16,minmax(24px, auto)); grid-template-columns: minmax(var(--grid-gutter), 1fr) repeat(8, minmax(0, var(--cell-max-width))) minmax(var(--grid-gutter), 1fr); row-gap: 0.0px; column-gap: 0.0px; } @media (min-width: 768px) { .background-width--inset .fe-6343e7df9a7c4b05ef290bfd { --inset-padding: calc(var(--sqs-site-gutter) * 2); } .fe-6343e7df9a7c4b05ef290bfd { --grid-gutter: calc(var(--sqs-site-gutter, 4vw) - 0.0px); --cell-max-width: calc( ( var(--sqs-site-max-width, 1500px) - (0.0px * (24 - 1)) ) / 24 ); --inset-padding: 0vw; --row-height-scaling-factor: 0.0215; --container-width: min(var(--sqs-site-max-width, 1500px), calc(100vw - var(--sqs-site-gutter, 4vw) * 2 - var(--inset-padding) )); grid-template-rows: repeat(10,minmax(calc(var(--container-width) * var(--row-height-scaling-factor)), auto)); grid-template-columns: minmax(var(--grid-gutter), 1fr) repeat(24, minmax(0, var(--cell-max-width))) minmax(var(--grid-gutter), 1fr); } } .fe-block-yui_3_17_2_1_1665394661004_32043 { grid-area: 2/2/8/10; z-index: 2; @media (max-width: 767px) { } } .fe-block-yui_3_17_2_1_1665394661004_32043 .sqs-block { justify-content: flex-start; } .fe-block-yui_3_17_2_1_1665394661004_32043 .sqs-block-alignment-wrapper { align-items: flex-start; } @media (min-width: 768px) { .fe-block-yui_3_17_2_1_1665394661004_32043 { grid-area: 1/3/10/8; z-index: 2; } .fe-block-yui_3_17_2_1_1665394661004_32043 .sqs-block { justify-content: center; } .fe-block-yui_3_17_2_1_1665394661004_32043 .sqs-block-alignment-wrapper { align-items: center; } } .fe-block-yui_3_17_2_1_1667820856499_112353 { grid-area: 10/2/12/10; z-index: 5; @media (max-width: 767px) { } } .fe-block-yui_3_17_2_1_1667820856499_112353 .sqs-block { justify-content: flex-start; } .fe-block-yui_3_17_2_1_1667820856499_112353 .sqs-block-alignment-wrapper { align-items: flex-start; } @media (min-width: 768px) { .fe-block-yui_3_17_2_1_1667820856499_112353 { grid-area: 4/11/6/17; z-index: 5; } .fe-block-yui_3_17_2_1_1667820856499_112353 .sqs-block { justify-content: center; } .fe-block-yui_3_17_2_1_1667820856499_112353 .sqs-block-alignment-wrapper { align-items: center; } } .fe-block-yui_3_17_2_1_1667226363828_506739 { grid-area: 12/2/14/10; z-index: 4; @media (max-width: 767px) { } } .fe-block-yui_3_17_2_1_1667226363828_506739 .sqs-block { justify-content: flex-start; } .fe-block-yui_3_17_2_1_1667226363828_506739 .sqs-block-alignment-wrapper { align-items: flex-start; } @media (min-width: 768px) { .fe-block-yui_3_17_2_1_1667226363828_506739 { grid-area: 6/11/8/17; z-index: 4; } .fe-block-yui_3_17_2_1_1667226363828_506739 .sqs-block { justify-content: center; } .fe-block-yui_3_17_2_1_1667226363828_506739 .sqs-block-alignment-wrapper { align-items: center; } } .fe-block-yui_3_17_2_1_1667226363828_491113 { grid-area: 15/2/17/10; z-index: 3; @media (max-width: 767px) { } } .fe-block-yui_3_17_2_1_1667226363828_491113 .sqs-block { justify-content: flex-start; } .fe-block-yui_3_17_2_1_1667226363828_491113 .sqs-block-alignment-wrapper { align-items: flex-start; } @media (min-width: 768px) { .fe-block-yui_3_17_2_1_1667226363828_491113 { grid-area: 10/11/11/17; z-index: 3; } .fe-block-yui_3_17_2_1_1667226363828_491113 .sqs-block { justify-content: flex-end; } .fe-block-yui_3_17_2_1_1667226363828_491113 .sqs-block-alignment-wrapper { align-items: flex-end; } } </style><div class="fluid-engine fe-6343e7df9a7c4b05ef290bfd"><div class="fe-block fe-block-yui_3_17_2_1_1665394661004_32043"><div class="sqs-block html-block sqs-block-html" data-blend-mode="NORMAL" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-yui_3_17_2_1_1665394661004_32043"><div class="sqs-block-content"> <div class="sqs-html-content"> <p class="sqsrte-small" style="white-space:pre-wrap;"><a href="/about">About</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"><a href="/research">Research</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"> <a href="/language-modeling">Language Modeling</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"> <a href="/interpretability">Interpretability</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"> <a href="/alignment">Alignment</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"> <a href="/other-modalities">Other Modalities</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"><a href="/releases">Releases</a></p><p class="sqsrte-small" style="white-space:pre-wrap;"><a href="https://blog.eleuther.ai">Blog</a></p> </div> </div></div></div><div class="fe-block fe-block-yui_3_17_2_1_1667820856499_112353"><div class="sqs-block html-block sqs-block-html" data-blend-mode="NORMAL" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-yui_3_17_2_1_1667820856499_112353"><div class="sqs-block-content"> <div class="sqs-html-content"> <p style="text-align:center;white-space:pre-wrap;" class="sqsrte-large">contact@eleuther.ai</p> </div> </div></div></div><div class="fe-block fe-block-yui_3_17_2_1_1667226363828_506739"><div class="sqs-block socialaccountlinks-v2-block sqs-block-socialaccountlinks-v2" data-block-type="54" id="block-yui_3_17_2_1_1667226363828_506739"><div class="sqs-block-content"> <div class="sqs-svg-icon--outer social-icon-alignment-center social-icons-color- social-icons-size-medium social-icons-style-regular " > <style> #block-yui_3_17_2_1_1667226363828_506739 .social-icons-style-border .sqs-svg-icon--wrapper { box-shadow: 0 0 0 2px inset; border: none; } </style> <nav class="sqs-svg-icon--list"> <a href="mailto:contact@eleuther.ai" target="_blank" class="sqs-svg-icon--wrapper email" aria-label="contact@eleuther.ai"> <div> <svg class="sqs-svg-icon--social" viewBox="0 0 64 64"> <use class="sqs-use--icon" xlink:href="#email-icon"></use> <use class="sqs-use--mask" xlink:href="#email-mask"></use> </svg> </div> </a><a href="https://discord.gg/zBGx3azzUn" target="_blank" class="sqs-svg-icon--wrapper discord-unauth" aria-label="Discord"> <div> <svg class="sqs-svg-icon--social" viewBox="0 0 64 64"> <use class="sqs-use--icon" xlink:href="#discord-unauth-icon"></use> <use class="sqs-use--mask" xlink:href="#discord-unauth-mask"></use> </svg> </div> </a><a href="https://github.com/EleutherAI" target="_blank" class="sqs-svg-icon--wrapper github-unauth" aria-label="GitHub"> <div> <svg class="sqs-svg-icon--social" viewBox="0 0 64 64"> <use class="sqs-use--icon" xlink:href="#github-unauth-icon"></use> <use class="sqs-use--mask" xlink:href="#github-unauth-mask"></use> </svg> </div> </a><a href="https://twitter.com/AiEleuther" target="_blank" class="sqs-svg-icon--wrapper twitter-unauth" aria-label="Twitter"> <div> <svg class="sqs-svg-icon--social" viewBox="0 0 64 64"> <use class="sqs-use--icon" xlink:href="#twitter-unauth-icon"></use> <use class="sqs-use--mask" xlink:href="#twitter-unauth-mask"></use> </svg> </div> </a> </nav> </div> </div></div></div><div class="fe-block fe-block-yui_3_17_2_1_1667226363828_491113"><div class="sqs-block html-block sqs-block-html" data-blend-mode="NORMAL" data-block-type="2" data-border-radii="{"topLeft":{"unit":"px","value":0.0},"topRight":{"unit":"px","value":0.0},"bottomLeft":{"unit":"px","value":0.0},"bottomRight":{"unit":"px","value":0.0}}" id="block-yui_3_17_2_1_1667226363828_491113"><div class="sqs-block-content"> <div class="sqs-html-content"> <p style="text-align:center;white-space:pre-wrap;" class="sqsrte-small">Copyright EleutherAI 2023</p> </div> </div></div></div></div></div> </div> </div> </section> </footer> </div> <script defer="true" src="https://static1.squarespace.com/static/vta/5c5a519771c10ba3470d8101/scripts/site-bundle.7a52a694fd4248c18775c6aa813a0358.js" type="text/javascript"></script> <script src="https://assets.squarewebsites.org/lazy-summaries/lazy-summaries.min.js"></script> <script> !function(){window.self===window.top||window.top.document.getElementById("lazy-summaries-admin")||function(e,t,s,i,a){if(s.querySelector("#"+t))i&&i(this);else{var n=document.createElement("script");n.src=e+"?cache="+((new Date).getTime()+"").substr(0,8),n.id=t,n.onload=function(){a&&this.remove(),i&&i(this)},s.appendChild(n)}}("https://assets.squarewebsites.org/lazy-summaries/lazy-summaries-admin.js","lazy-summaries-admin",window.top.document.getElementsByTagName("head")[0])}(); </script> <!-- Re-typset page in case it is dynamic, per https://stackoverflow.com/questions/25839396/is-it-possible-to-use-mathjax-on-squarespace --> <script> MathJax.Hub.Queue(["Typeset",MathJax.Hub]); </script> <script> (function () { let folders; function rebuildAnchor(folder) { let parent = folder.closest('.header-nav-item--folder'), href = folder.href.includes('.com') ? folder.href.split('.com')[1].replace("-folder/", "") : folder.href.replace("-folder/", ""), anchorClone = folder.cloneNode(true); anchorClone.classList.add('clickable-folder'); anchorClone.setAttribute('href', href); anchorClone.style.cssText = ` opacity: 1; transform: unset; `; parent.insertAdjacentElement('afterbegin', anchorClone); if (href == window.location.pathname) { console.log() anchorClone.closest('.header-nav-item--folder').classList.add('header-nav-item--active') } } function addToMobile(folder) { let href = folder.getAttribute("href"), hrefAdjusted = href.includes('.com') ? href.split('.com')[1].replace("-folder/", "") : href.replace("-folder/", ""), text = folder.innerText, newText = `All ${text}`, mobileFolder = document.querySelector(`[data-folder="${href}"]`), backButton = mobileFolder.querySelector(".header-menu-nav-folder-content > *:first-of-type"), allButton = `<div class="container header-menu-nav-item header-menu-nav-item--external"> <a href="${hrefAdjusted}">${newText}</a> <div>`; backButton.insertAdjacentHTML('afterend', allButton) } /* Select All Folder Links & */ function setFolderLinks() { folders = document.querySelectorAll('.header-display-desktop .header-nav-folder-title[href*="-folder/"]'); for (let folder of folders) { window.addEventListener('load', function() { addToMobile(folder); rebuildAnchor(folder); folder.remove(); }); } } setFolderLinks(); })(); </script><svg xmlns="http://www.w3.org/2000/svg" version="1.1" style="display:none" data-usage="social-icons-svg"><symbol id="email-icon" viewBox="0 0 64 64"><path d="M17,22v20h30V22H17z M41.1,25L32,32.1L22.9,25H41.1z M20,39V26.6l12,9.3l12-9.3V39H20z"/></symbol><symbol id="email-mask" viewBox="0 0 64 64"><path d="M41.1,25H22.9l9.1,7.1L41.1,25z M44,26.6l-12,9.3l-12-9.3V39h24V26.6z M0,0v64h64V0H0z M47,42H17V22h30V42z"/></symbol><symbol id="discord-unauth-icon" viewBox="0 0 64 64"><path d="M42.3963 22.8955C40.4842 22.0182 38.4337 21.3718 36.2899 21.0016C36.2508 20.9944 36.2118 21.0123 36.1917 21.048C35.928 21.517 35.6359 22.1289 35.4314 22.6098C33.1255 22.2646 30.8315 22.2646 28.5729 22.6098C28.3683 22.1182 28.0656 21.517 27.8007 21.048C27.7806 21.0135 27.7416 20.9956 27.7026 21.0016C25.5599 21.3706 23.5095 22.017 21.5962 22.8955C21.5796 22.9027 21.5654 22.9146 21.556 22.93C17.6667 28.7405 16.6013 34.4081 17.124 40.0055C17.1263 40.0329 17.1417 40.0591 17.163 40.0757C19.729 41.9601 22.2146 43.1041 24.6541 43.8624C24.6931 43.8743 24.7345 43.8601 24.7594 43.8279C25.3364 43.0399 25.8508 42.209 26.2918 41.3352C26.3179 41.284 26.293 41.2233 26.2398 41.203C25.4239 40.8935 24.647 40.5162 23.8997 40.0876C23.8405 40.0531 23.8358 39.9686 23.8902 39.9281C24.0475 39.8102 24.2048 39.6876 24.3549 39.5638C24.3821 39.5412 24.42 39.5364 24.4519 39.5507C29.3616 41.7923 34.6769 41.7923 39.5287 39.5507C39.5606 39.5352 39.5985 39.54 39.6268 39.5626C39.7771 39.6864 39.9343 39.8102 40.0928 39.9281C40.1472 39.9686 40.1436 40.0531 40.0845 40.0876C39.3372 40.5245 38.5602 40.8935 37.7431 41.2019C37.6899 41.2221 37.6663 41.284 37.6923 41.3352C38.1428 42.2077 38.6572 43.0387 39.2236 43.8267C39.2473 43.8601 39.2898 43.8743 39.3289 43.8624C41.7802 43.1041 44.2658 41.9601 46.8318 40.0757C46.8543 40.0591 46.8685 40.0341 46.8708 40.0067C47.4964 33.5355 45.8231 27.9143 42.4353 22.9312C42.427 22.9146 42.4128 22.9027 42.3963 22.8955ZM27.025 36.5973C25.5469 36.5973 24.3289 35.2402 24.3289 33.5736C24.3289 31.907 25.5233 30.55 27.025 30.55C28.5386 30.55 29.7448 31.9189 29.7211 33.5736C29.7211 35.2402 28.5268 36.5973 27.025 36.5973ZM36.9934 36.5973C35.5153 36.5973 34.2974 35.2402 34.2974 33.5736C34.2974 31.907 35.4917 30.55 36.9934 30.55C38.507 30.55 39.7132 31.9189 39.6895 33.5736C39.6895 35.2402 38.507 36.5973 36.9934 36.5973Z" /></symbol><symbol id="discord-unauth-mask" viewBox="0 0 64 64"><path fill-rule="evenodd" clip-rule="evenodd" d="M64 0H0V64H64V0ZM36.23 20.0021C36.42 20.0021 40.8 20.3921 43.57 22.6221C45.07 24.0021 48 31.9321 47.98 38.7821C47.9812 38.9047 47.9501 39.0253 47.89 39.1321C45.87 42.6521 40.35 43.6521 39.09 43.6521C38.9798 43.6529 38.8709 43.628 38.772 43.5794C38.673 43.5308 38.5868 43.4598 38.52 43.3721L37.25 41.6221C39.1718 41.1872 40.9765 40.3412 42.54 39.1421C42.6637 39.0161 42.7351 38.8481 42.7401 38.6716C42.7451 38.4951 42.6832 38.3233 42.5669 38.1905C42.4506 38.0578 42.2884 37.9738 42.1128 37.9556C41.9372 37.9373 41.7612 37.9861 41.62 38.0921C41.59 38.1221 38.35 40.8721 32 40.8721C25.65 40.8721 22.36 38.0921 22.36 38.0921C22.2189 37.9861 22.0429 37.9373 21.8673 37.9556C21.6917 37.9738 21.5295 38.0578 21.4132 38.1905C21.2968 38.3233 21.2349 38.4951 21.2399 38.6716C21.2449 38.8481 21.3164 39.0161 21.44 39.1421C23.0035 40.3412 24.8083 41.1872 26.73 41.6221L25.46 43.3721C25.3933 43.4598 25.307 43.5308 25.2081 43.5794C25.1091 43.628 25.0003 43.6529 24.89 43.6521C23.63 43.6121 18.09 42.6821 16.09 39.1321C16.0299 39.0253 15.9989 38.9047 16 38.7821C16 31.9321 18.93 24.0021 20.4 22.6521C23.2 20.3921 27.58 20.0021 27.77 20.0021C27.9146 19.9908 28.0591 20.0252 28.1829 20.1006C28.3068 20.176 28.4037 20.2885 28.46 20.4221C28.5436 20.6103 28.6138 20.8041 28.67 21.0021C26.4824 21.3233 24.3773 22.0635 22.47 23.1821C22.3885 23.232 22.3178 23.2979 22.2623 23.3757C22.2068 23.4536 22.1676 23.5419 22.1471 23.6353C22.1265 23.7287 22.125 23.8252 22.1427 23.9192C22.1604 24.0132 22.1969 24.1026 22.25 24.1821C22.2981 24.2656 22.3628 24.3383 22.4402 24.3956C22.5176 24.4529 22.606 24.4936 22.6999 24.5152C22.7937 24.5368 22.891 24.5388 22.9857 24.5211C23.0804 24.5033 23.1704 24.4662 23.25 24.4121C25.9097 22.9176 28.9092 22.1326 31.96 22.1326C35.0108 22.1326 38.0103 22.9176 40.67 24.4121C40.7497 24.4662 40.8397 24.5033 40.9344 24.5211C41.029 24.5388 41.1263 24.5368 41.2202 24.5152C41.3141 24.4936 41.4025 24.4529 41.4799 24.3956C41.5573 24.3383 41.622 24.2656 41.67 24.1821C41.7231 24.1026 41.7596 24.0132 41.7773 23.9192C41.795 23.8252 41.7935 23.7287 41.773 23.6353C41.7525 23.5419 41.7133 23.4536 41.6578 23.3757C41.6023 23.2979 41.5316 23.232 41.45 23.1821C39.5679 22.0696 37.4907 21.3263 35.33 20.9921C35.3895 20.7952 35.4596 20.6015 35.54 20.4121C35.5991 20.2813 35.697 20.1718 35.8204 20.0984C35.9439 20.0251 36.0869 19.9915 36.23 20.0021ZM26.4843 35.7456C26.7961 35.8958 27.1345 35.983 27.48 36.0021C28.1619 35.9409 28.793 35.6163 29.2392 35.0972C29.6855 34.5781 29.9118 33.9054 29.87 33.2221C29.9141 32.5308 29.6836 31.85 29.2286 31.3276C28.7736 30.8052 28.1309 30.4834 27.44 30.4321C26.7474 30.4809 26.1021 30.8016 25.645 31.3243C25.1879 31.8469 24.9561 32.5292 25 33.2221C24.9798 33.5676 25.0279 33.9137 25.1416 34.2406C25.2554 34.5675 25.4326 34.8687 25.6629 35.1269C25.8933 35.3852 26.1724 35.5955 26.4843 35.7456ZM34.6952 35.1102C35.1499 35.6304 35.791 35.9508 36.48 36.0021H36.52C36.8656 35.983 37.204 35.8958 37.5158 35.7456C37.8276 35.5955 38.1067 35.3852 38.3371 35.1269C38.5675 34.8687 38.7447 34.5675 38.8584 34.2406C38.9722 33.9137 39.0203 33.5676 39 33.2221C39.0216 32.8726 38.9732 32.5223 38.8576 32.1917C38.742 31.8612 38.5615 31.5571 38.3268 31.2972C38.092 31.0373 37.8078 30.8269 37.4907 30.6784C37.1735 30.5298 36.8299 30.4461 36.48 30.4321C35.7892 30.4834 35.1465 30.8052 34.6915 31.3276C34.2365 31.85 34.006 32.5308 34.05 33.2221C34.0087 33.9118 34.2405 34.59 34.6952 35.1102Z" /></symbol><symbol id="github-unauth-icon" viewBox="0 0 64 64"><path d="M32,16c-8.8,0-16,7.2-16,16c0,7.1,4.6,13.1,10.9,15.2 c0.8,0.1,1.1-0.3,1.1-0.8c0-0.4,0-1.4,0-2.7c-4.5,1-5.4-2.1-5.4-2.1c-0.7-1.8-1.8-2.3-1.8-2.3c-1.5-1,0.1-1,0.1-1 c1.6,0.1,2.5,1.6,2.5,1.6c1.4,2.4,3.7,1.7,4.7,1.3c0.1-1,0.6-1.7,1-2.1c-3.6-0.4-7.3-1.8-7.3-7.9c0-1.7,0.6-3.2,1.6-4.3 c-0.2-0.4-0.7-2,0.2-4.2c0,0,1.3-0.4,4.4,1.6c1.3-0.4,2.6-0.5,4-0.5c1.4,0,2.7,0.2,4,0.5c3.1-2.1,4.4-1.6,4.4-1.6 c0.9,2.2,0.3,3.8,0.2,4.2c1,1.1,1.6,2.5,1.6,4.3c0,6.1-3.7,7.5-7.3,7.9c0.6,0.5,1.1,1.5,1.1,3c0,2.1,0,3.9,0,4.4 c0,0.4,0.3,0.9,1.1,0.8C43.4,45.1,48,39.1,48,32C48,23.2,40.8,16,32,16z"/></symbol><symbol id="github-unauth-mask" viewBox="0 0 64 64"><path d="M0,0v64h64V0H0z M37.1,47.2c-0.8,0.2-1.1-0.3-1.1-0.8c0-0.5,0-2.3,0-4.4c0-1.5-0.5-2.5-1.1-3 c3.6-0.4,7.3-1.7,7.3-7.9c0-1.7-0.6-3.2-1.6-4.3c0.2-0.4,0.7-2-0.2-4.2c0,0-1.3-0.4-4.4,1.6c-1.3-0.4-2.6-0.5-4-0.5 c-1.4,0-2.7,0.2-4,0.5c-3.1-2.1-4.4-1.6-4.4-1.6c-0.9,2.2-0.3,3.8-0.2,4.2c-1,1.1-1.6,2.5-1.6,4.3c0,6.1,3.7,7.5,7.3,7.9 c-0.5,0.4-0.9,1.1-1,2.1c-0.9,0.4-3.2,1.1-4.7-1.3c0,0-0.8-1.5-2.5-1.6c0,0-1.6,0-0.1,1c0,0,1,0.5,1.8,2.3c0,0,0.9,3.1,5.4,2.1 c0,1.3,0,2.3,0,2.7c0,0.4-0.3,0.9-1.1,0.8C20.6,45.1,16,39.1,16,32c0-8.8,7.2-16,16-16c8.8,0,16,7.2,16,16 C48,39.1,43.4,45.1,37.1,47.2z"/></symbol><symbol id="twitter-unauth-icon" viewBox="0 0 64 64"><path d="M48,22.1c-1.2,0.5-2.4,0.9-3.8,1c1.4-0.8,2.4-2.1,2.9-3.6c-1.3,0.8-2.7,1.3-4.2,1.6 C41.7,19.8,40,19,38.2,19c-3.6,0-6.6,2.9-6.6,6.6c0,0.5,0.1,1,0.2,1.5c-5.5-0.3-10.3-2.9-13.5-6.9c-0.6,1-0.9,2.1-0.9,3.3 c0,2.3,1.2,4.3,2.9,5.5c-1.1,0-2.1-0.3-3-0.8c0,0,0,0.1,0,0.1c0,3.2,2.3,5.8,5.3,6.4c-0.6,0.1-1.1,0.2-1.7,0.2c-0.4,0-0.8,0-1.2-0.1 c0.8,2.6,3.3,4.5,6.1,4.6c-2.2,1.8-5.1,2.8-8.2,2.8c-0.5,0-1.1,0-1.6-0.1c2.9,1.9,6.4,2.9,10.1,2.9c12.1,0,18.7-10,18.7-18.7 c0-0.3,0-0.6,0-0.8C46,24.5,47.1,23.4,48,22.1z"/></symbol><symbol id="twitter-unauth-mask" viewBox="0 0 64 64"><path d="M0,0v64h64V0H0z M44.7,25.5c0,0.3,0,0.6,0,0.8C44.7,35,38.1,45,26.1,45c-3.7,0-7.2-1.1-10.1-2.9 c0.5,0.1,1,0.1,1.6,0.1c3.1,0,5.9-1,8.2-2.8c-2.9-0.1-5.3-2-6.1-4.6c0.4,0.1,0.8,0.1,1.2,0.1c0.6,0,1.2-0.1,1.7-0.2 c-3-0.6-5.3-3.3-5.3-6.4c0,0,0-0.1,0-0.1c0.9,0.5,1.9,0.8,3,0.8c-1.8-1.2-2.9-3.2-2.9-5.5c0-1.2,0.3-2.3,0.9-3.3 c3.2,4,8.1,6.6,13.5,6.9c-0.1-0.5-0.2-1-0.2-1.5c0-3.6,2.9-6.6,6.6-6.6c1.9,0,3.6,0.8,4.8,2.1c1.5-0.3,2.9-0.8,4.2-1.6 c-0.5,1.5-1.5,2.8-2.9,3.6c1.3-0.2,2.6-0.5,3.8-1C47.1,23.4,46,24.5,44.7,25.5z"/></symbol></svg> </body> </html>