CINXE.COM

GitHub - JasonKessler/scattertext: Beautiful visualizations of how language differs among document types.

<!DOCTYPE html> <html lang="en" data-color-mode="auto" data-light-theme="light" data-dark-theme="dark" data-a11y-animated-images="system" data-a11y-link-underlines="true" > <head> <meta charset="utf-8"> <link rel="dns-prefetch" href="https://github.githubassets.com"> <link rel="dns-prefetch" href="https://avatars.githubusercontent.com"> <link rel="dns-prefetch" href="https://github-cloud.s3.amazonaws.com"> <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/"> <link rel="preconnect" href="https://github.githubassets.com" crossorigin> <link rel="preconnect" href="https://avatars.githubusercontent.com"> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/light-74231a1f3bbb.css" /><link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/dark-8a995f0bacd4.css" /><link data-color-theme="dark_dimmed" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_dimmed-f37fb7684b1f.css" /><link data-color-theme="dark_high_contrast" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_high_contrast-9ac301c3ebe5.css" /><link data-color-theme="dark_colorblind" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_colorblind-cd826e8636dc.css" /><link data-color-theme="light_colorblind" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_colorblind-f91b0f603451.css" /><link data-color-theme="light_high_contrast" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_high_contrast-83beb16e0ecf.css" /><link data-color-theme="light_tritanopia" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/light_tritanopia-6e122dab64fc.css" /><link data-color-theme="dark_tritanopia" crossorigin="anonymous" media="all" rel="stylesheet" data-href="https://github.githubassets.com/assets/dark_tritanopia-18119e682df0.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-primitives-225433424a87.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-aaa714e5674d.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/global-7eaba1d4847c.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/github-43ae85d4871b.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/repository-4fce88777fa8.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/code-0210be90f4d3.css" /> <script type="application/json" id="client-env">{"locale":"en","featureFlags":["a11y_quote_reply_fix","copilot_immersive_issue_preview","copilot_new_references_ui","copilot_chat_repo_custom_instructions_preview","copilot_no_floating_button","copilot_topics_as_references","copilot_read_shared_conversation","copilot_duplicate_thread","copilot_buffered_streaming","dotcom_chat_client_side_skills","experimentation_azure_variant_endpoint","failbot_handle_non_errors","fgpat_form_ui_updates","geojson_azure_maps","ghost_pilot_confidence_truncation_25","ghost_pilot_confidence_truncation_40","github_models_o3_mini_streaming","hovercard_accessibility","insert_before_patch","issues_react_remove_placeholders","issues_react_blur_item_picker_on_close","marketing_pages_search_explore_provider","primer_react_css_modules_ga","react_data_router_pull_requests","remove_child_patch","sample_network_conn_type","swp_enterprise_contact_form","site_proxima_australia_update","viewscreen_sandbox","issues_react_create_milestone","issues_react_cache_fix_workaround","lifecycle_label_name_updates","copilot_task_oriented_assistive_prompts","issues_react_assignee_warning","issue_types_prevent_private_type_creation","refresh_image_video_src","react_router_dispose_on_disconnect","turbo_app_id_restore"]}</script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/wp-runtime-e3a2d17dd2bb.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_oddbird_popover-polyfill_dist_popover_js-9da652f58479.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_arianotify-polyfill_ariaNotify-polyfill_js-node_modules_github_mi-3abb8f-46b9f4874d95.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_failbot_failbot_ts-75968cfb5298.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/environment-f04cb2a9fc8c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_primer_behaviors_dist_esm_index_mjs-0dbb79f97f8f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_selector-observer_dist_index_esm_js-f690fd9ae3d5.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_relative-time-element_dist_index_js-62d275b7ddd9.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_text-expander-element_dist_index_js-78748950cb0c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_auto-complete-element_dist_index_js-node_modules_github_catalyst_-8e9f78-a90ac05d2469.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_filter-input-element_dist_index_js-node_modules_github_remote-inp-b5f1d7-a1760ffda83d.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_markdown-toolbar-element_dist_index_js-ceef33f593fa.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_primer_view-co-c44a69-8c52cf4cd0d3.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/github-elements-394f8eb34f19.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/element-registry-e0a42d158bcc.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_braintree_browser-detection_dist_browser-detection_js-node_modules_githu-2906d7-2a07a295af40.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_lit-html_lit-html_js-be8cb88f481b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_morphdom_dist_morphdom-e-7c534c-a4a1922eb55f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_turbo_dist_turbo_es2017-esm_js-a03ee12d659a.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-893f9f-b6294cf703b7.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_color-convert_index_js-e3180fe3bcb3.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_quote-selection_dist_index_js-node_modules_github_session-resume_-947061-e7a6c4a19f98.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_updatable-content_updatable-content_ts-2a55124d5c52.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_task-list_ts-app_assets_modules_github_sso_ts-ui_packages-900dde-768abe60b1f8.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_sticky-scroll-into-view_ts-3e000c5d31a9.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_ajax-error_ts-app_assets_modules_github_behaviors_include-87a4ae-b8865f653f6b.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_behaviors_commenting_edit_ts-app_assets_modules_github_behaviors_ht-83c235-e429cff6ceb1.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/behaviors-7ebb6421bf22.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_delegated-events_dist_index_js-node_modules_github_catalyst_lib_index_js-f6223d90c7ba.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/notifications-global-01e85cd1be94.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_virtualized-list_es_index_js-node_modules_github_template-parts_lib_index_js-94dc7a2157c1.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-70450e-4b93df70b903.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/app_assets_modules_github_ref-selector_ts-3e9d848bab5f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/codespaces-c3bcacfe317c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_filter-input-element_dist_index_js-node_modules_github_remote-inp-3eebbd-0763620ad7bf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_decorators_js-node_modules_delegated-events_di-e161aa-9d41fb1b6c9e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_file-attachment-element_dist_index_js-node_modules_github_remote--3c9c82-b71ef90fbdc7.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/repositories-7a0dbaa42c57.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_github_catalyst_lib_inde-dbbea9-26cce2010167.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/code-menu-1c0aedc134b1.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/primer-react-e05a7c4c5398.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/react-core-aaa76995a864.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/react-lib-f1bca44e0926.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/octicons-react-cf2f2ab8dab4.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_emotion_is-prop-valid_dist_emotion-is-prop-valid_esm_js-node_modules_emo-62da9f-2df2f32ec596.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_mini-throttle_dist_index_js-node_modules_stacktrace-parser_dist_s-e7dcdd-9a233856b02c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_oddbird_popover-polyfill_dist_popover-fn_js-55fea94174bf.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/notifications-subscriptions-menu-58a0c58bfee4.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/notifications-subscriptions-menu.1bcff9205c241e99cff2.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/notifications-subscriptions-menu.1bcff9205c241e99cff2.module.css" /> <title>GitHub - JasonKessler/scattertext: Beautiful visualizations of how language differs among document types.</title> <meta name="route-pattern" content="/:user_id/:repository" data-turbo-transient> <meta name="route-controller" content="files" data-turbo-transient> <meta name="route-action" content="disambiguate" data-turbo-transient> <meta name="current-catalog-service-hash" content="f3abb0cc802f3d7b95fc8762b94bdcb13bf39634c40c357301c4aa1d67a256fb"> <meta name="request-id" content="8242:F5484:1E8EB8:28350C:67E32BE3" data-pjax-transient="true"/><meta name="html-safe-nonce" content="e47f75f249f7e902b6dd610b04ea6dd2c2e1d4bd80253c2021e6816943fd99c6" data-pjax-transient="true"/><meta name="visitor-payload" content="eyJyZWZlcnJlciI6IiIsInJlcXVlc3RfaWQiOiI4MjQyOkY1NDg0OjFFOEVCODoyODM1MEM6NjdFMzJCRTMiLCJ2aXNpdG9yX2lkIjoiODM0ODU2MzQ0ODk1NjkyMjg1MSIsInJlZ2lvbl9lZGdlIjoic291dGhlYXN0YXNpYSIsInJlZ2lvbl9yZW5kZXIiOiJzb3V0aGVhc3Rhc2lhIn0=" data-pjax-transient="true"/><meta name="visitor-hmac" content="206f13f141a9aacf5d564597414f96857797ca0aa96224a554a73502537e5639" data-pjax-transient="true"/> <meta name="hovercard-subject-tag" content="repository:63827736" data-turbo-transient> <meta name="github-keyboard-shortcuts" content="repository,copilot" data-turbo-transient="true" /> <meta name="selected-link" value="repo_source" data-turbo-transient> <link rel="assets" href="https://github.githubassets.com/"> <meta name="google-site-verification" content="Apib7-x98H0j5cPqHWwSMm6dNU4GmODRoqxLiDzdx9I"> <meta name="octolytics-url" content="https://collector.github.com/github/collect" /> <meta name="analytics-location" content="/&lt;user-name&gt;/&lt;repo-name&gt;" data-turbo-transient="true" /> <meta name="user-login" content=""> <meta name="viewport" content="width=device-width"> <meta name="description" content="Beautiful visualizations of how language differs among document types. - JasonKessler/scattertext"> <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="GitHub"> <link rel="fluid-icon" href="https://github.com/fluidicon.png" title="GitHub"> <meta property="fb:app_id" content="1401488693436528"> <meta name="apple-itunes-app" content="app-id=1477376905, app-argument=https://github.com/JasonKessler/scattertext" /> <meta name="twitter:image" content="https://opengraph.githubassets.com/d0dd4512d8d567071154364775c42f92f2194fa93b7dd83afb195f06776c3221/JasonKessler/scattertext" /><meta name="twitter:site" content="@github" /><meta name="twitter:card" content="summary_large_image" /><meta name="twitter:title" content="GitHub - JasonKessler/scattertext: Beautiful visualizations of how language differs among document types." /><meta name="twitter:description" content="Beautiful visualizations of how language differs among document types. - JasonKessler/scattertext" /> <meta property="og:image" content="https://opengraph.githubassets.com/d0dd4512d8d567071154364775c42f92f2194fa93b7dd83afb195f06776c3221/JasonKessler/scattertext" /><meta property="og:image:alt" content="Beautiful visualizations of how language differs among document types. - JasonKessler/scattertext" /><meta property="og:image:width" content="1200" /><meta property="og:image:height" content="600" /><meta property="og:site_name" content="GitHub" /><meta property="og:type" content="object" /><meta property="og:title" content="GitHub - JasonKessler/scattertext: Beautiful visualizations of how language differs among document types." /><meta property="og:url" content="https://github.com/JasonKessler/scattertext" /><meta property="og:description" content="Beautiful visualizations of how language differs among document types. - JasonKessler/scattertext" /> <meta name="hostname" content="github.com"> <meta name="expected-hostname" content="github.com"> <meta http-equiv="x-pjax-version" content="21a530f831db6add574b4eba5aa7de6c0c885750770a1c2a9d3c0191c8b346c5" data-turbo-track="reload"> <meta http-equiv="x-pjax-csp-version" content="77190eb53eb47fc30bd2fcc17a7eefa2dfd8505869fee9299ba911be3a40a9eb" data-turbo-track="reload"> <meta http-equiv="x-pjax-css-version" content="1994cd18701e16e6efa87d97f308447f5b0f15b7ae2b58d73f3d026c94bd5edd" data-turbo-track="reload"> <meta http-equiv="x-pjax-js-version" content="83cbdd9b2610955338e649f5263341f6dc31ca4c97d9827ec701a585c3d5cc70" data-turbo-track="reload"> <meta name="turbo-cache-control" content="no-preview" data-turbo-transient=""> <meta data-hydrostats="publish"> <meta name="go-import" content="github.com/JasonKessler/scattertext git https://github.com/JasonKessler/scattertext.git"> <meta name="octolytics-dimension-user_id" content="312924" /><meta name="octolytics-dimension-user_login" content="JasonKessler" /><meta name="octolytics-dimension-repository_id" content="63827736" /><meta name="octolytics-dimension-repository_nwo" content="JasonKessler/scattertext" /><meta name="octolytics-dimension-repository_public" content="true" /><meta name="octolytics-dimension-repository_is_fork" content="false" /><meta name="octolytics-dimension-repository_network_root_id" content="63827736" /><meta name="octolytics-dimension-repository_network_root_nwo" content="JasonKessler/scattertext" /> <link rel="canonical" href="https://github.com/JasonKessler/scattertext" data-turbo-transient> <meta name="turbo-body-classes" content="logged-out env-production page-responsive"> <meta name="browser-stats-url" content="https://api.github.com/_private/browser/stats"> <meta name="browser-errors-url" content="https://api.github.com/_private/browser/errors"> <meta name="release" content="b0319dcb8ec8f1b6326542019e35c3097bc0b29f"> <link rel="mask-icon" href="https://github.githubassets.com/assets/pinned-octocat-093da3e6fa40.svg" color="#000000"> <link rel="alternate icon" class="js-site-favicon" type="image/png" href="https://github.githubassets.com/favicons/favicon.png"> <link rel="icon" class="js-site-favicon" type="image/svg+xml" href="https://github.githubassets.com/favicons/favicon.svg" data-base-href="https://github.githubassets.com/favicons/favicon"> <meta name="theme-color" content="#1e2327"> <meta name="color-scheme" content="light dark" /> <link rel="manifest" href="/manifest.json" crossOrigin="use-credentials"> </head> <body class="logged-out env-production page-responsive" style="word-wrap: break-word;"> <div data-turbo-body class="logged-out env-production page-responsive" style="word-wrap: break-word;"> <div class="position-relative header-wrapper js-header-wrapper "> <a href="#start-of-content" data-skip-target-assigned="false" class="px-2 py-4 color-bg-accent-emphasis color-fg-on-emphasis show-on-focus js-skip-to-content">Skip to content</a> <span data-view-component="true" class="progress-pjax-loader Progress position-fixed width-full"> <span style="width: 0%;" data-view-component="true" class="Progress-item progress-pjax-loader-bar left-0 top-0 color-bg-accent-emphasis"></span> </span> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_ui-commands_ui-commands_ts-9fbfacd366dd.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/keyboard-shortcuts-dialog-33dfb803e078.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <react-partial partial-name="keyboard-shortcuts-dialog" data-ssr="false" data-attempted-ssr="false" > <script type="application/json" data-target="react-partial.embeddedData">{"props":{"docsUrl":"https://docs.github.com/get-started/accessibility/keyboard-shortcuts"}}</script> <div data-target="react-partial.reactRoot"></div> </react-partial> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_github_remote-form_dist_index_js-node_modules_delegated-events_dist_inde-94fd67-4898d1bf4b51.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/sessions-730dca81d0a2.js"></script> <header class="HeaderMktg header-logged-out js-details-container js-header Details f4 py-3" role="banner" data-is-top="true" data-color-mode=light data-light-theme=light data-dark-theme=dark> <h2 class="sr-only">Navigation Menu</h2> <button type="button" class="HeaderMktg-backdrop d-lg-none border-0 position-fixed top-0 left-0 width-full height-full js-details-target" aria-label="Toggle navigation"> <span class="d-none">Toggle navigation</span> </button> <div class="d-flex flex-column flex-lg-row flex-items-center px-3 px-md-4 px-lg-5 height-full position-relative z-1"> <div class="d-flex flex-justify-between flex-items-center width-full width-lg-auto"> <div class="flex-1"> <button aria-label="Toggle navigation" aria-expanded="false" type="button" data-view-component="true" class="js-details-target js-nav-padding-recalculate js-header-menu-toggle Button--link Button--medium Button d-lg-none color-fg-inherit p-1"> <span class="Button-content"> <span class="Button-label"><div class="HeaderMenu-toggle-bar rounded my-1"></div> <div class="HeaderMenu-toggle-bar rounded my-1"></div> <div class="HeaderMenu-toggle-bar rounded my-1"></div></span> </span> </button> </div> <a class="mr-lg-3 color-fg-inherit flex-order-2 js-prevent-focus-on-mobile-nav" href="/" aria-label="Homepage" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to go to homepage&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Logomark;ref_loc:Header&quot;}"> <svg height="32" aria-hidden="true" viewBox="0 0 24 24" version="1.1" width="32" data-view-component="true" class="octicon octicon-mark-github"> <path d="M12 1C5.9225 1 1 5.9225 1 12C1 16.8675 4.14875 20.9787 8.52125 22.4362C9.07125 22.5325 9.2775 22.2025 9.2775 21.9137C9.2775 21.6525 9.26375 20.7862 9.26375 19.865C6.5 20.3737 5.785 19.1912 5.565 18.5725C5.44125 18.2562 4.905 17.28 4.4375 17.0187C4.0525 16.8125 3.5025 16.3037 4.42375 16.29C5.29 16.2762 5.90875 17.0875 6.115 17.4175C7.105 19.0812 8.68625 18.6137 9.31875 18.325C9.415 17.61 9.70375 17.1287 10.02 16.8537C7.5725 16.5787 5.015 15.63 5.015 11.4225C5.015 10.2262 5.44125 9.23625 6.1425 8.46625C6.0325 8.19125 5.6475 7.06375 6.2525 5.55125C6.2525 5.55125 7.17375 5.2625 9.2775 6.67875C10.1575 6.43125 11.0925 6.3075 12.0275 6.3075C12.9625 6.3075 13.8975 6.43125 14.7775 6.67875C16.8813 5.24875 17.8025 5.55125 17.8025 5.55125C18.4075 7.06375 18.0225 8.19125 17.9125 8.46625C18.6138 9.23625 19.04 10.2125 19.04 11.4225C19.04 15.6437 16.4688 16.5787 14.0213 16.8537C14.42 17.1975 14.7638 17.8575 14.7638 18.8887C14.7638 20.36 14.75 21.5425 14.75 21.9137C14.75 22.2025 14.9563 22.5462 15.5063 22.4362C19.8513 20.9787 23 16.8537 23 12C23 5.9225 18.0775 1 12 1Z"></path> </svg> </a> <div class="flex-1 flex-order-2 text-right"> <a href="/login?return_to=https%3A%2F%2Fgithub.com%2FJasonKessler%2Fscattertext" class="HeaderMenu-link HeaderMenu-button d-inline-flex d-lg-none flex-order-1 f5 no-underline border color-border-default rounded-2 px-2 py-1 color-fg-inherit js-prevent-focus-on-mobile-nav" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="74ad727fdaab4f5d669815d6a8739fbf095b6abcba8c6cb0ca4d41a07c410f4c" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to Sign in&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Sign in;ref_loc:Header&quot;}" > Sign in </a> </div> </div> <div class="HeaderMenu js-header-menu height-fit position-lg-relative d-lg-flex flex-column flex-auto top-0"> <div class="HeaderMenu-wrapper d-flex flex-column flex-self-start flex-lg-row flex-auto rounded rounded-lg-0"> <nav class="HeaderMenu-nav" aria-label="Global"> <ul class="d-lg-flex list-style-none"> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Product <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_copilot&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_copilot_link_product_navbar&quot;}" href="https://github.com/features/copilot"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-copilot color-fg-subtle mr-3"> <path d="M23.922 16.992c-.861 1.495-5.859 5.023-11.922 5.023-6.063 0-11.061-3.528-11.922-5.023A.641.641 0 0 1 0 16.736v-2.869a.841.841 0 0 1 .053-.22c.372-.935 1.347-2.292 2.605-2.656.167-.429.414-1.055.644-1.517a10.195 10.195 0 0 1-.052-1.086c0-1.331.282-2.499 1.132-3.368.397-.406.89-.717 1.474-.952 1.399-1.136 3.392-2.093 6.122-2.093 2.731 0 4.767.957 6.166 2.093.584.235 1.077.546 1.474.952.85.869 1.132 2.037 1.132 3.368 0 .368-.014.733-.052 1.086.23.462.477 1.088.644 1.517 1.258.364 2.233 1.721 2.605 2.656a.832.832 0 0 1 .053.22v2.869a.641.641 0 0 1-.078.256ZM12.172 11h-.344a4.323 4.323 0 0 1-.355.508C10.703 12.455 9.555 13 7.965 13c-1.725 0-2.989-.359-3.782-1.259a2.005 2.005 0 0 1-.085-.104L4 11.741v6.585c1.435.779 4.514 2.179 8 2.179 3.486 0 6.565-1.4 8-2.179v-6.585l-.098-.104s-.033.045-.085.104c-.793.9-2.057 1.259-3.782 1.259-1.59 0-2.738-.545-3.508-1.492a4.323 4.323 0 0 1-.355-.508h-.016.016Zm.641-2.935c.136 1.057.403 1.913.878 2.497.442.544 1.134.938 2.344.938 1.573 0 2.292-.337 2.657-.751.384-.435.558-1.15.558-2.361 0-1.14-.243-1.847-.705-2.319-.477-.488-1.319-.862-2.824-1.025-1.487-.161-2.192.138-2.533.529-.269.307-.437.808-.438 1.578v.021c0 .265.021.562.063.893Zm-1.626 0c.042-.331.063-.628.063-.894v-.02c-.001-.77-.169-1.271-.438-1.578-.341-.391-1.046-.69-2.533-.529-1.505.163-2.347.537-2.824 1.025-.462.472-.705 1.179-.705 2.319 0 1.211.175 1.926.558 2.361.365.414 1.084.751 2.657.751 1.21 0 1.902-.394 2.344-.938.475-.584.742-1.44.878-2.497Z"></path><path d="M14.5 14.25a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Zm-5 0a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Z"></path> </svg> <div> <div class="color-fg-default h4">GitHub Copilot</div> Write better code with AI </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;security&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;security_link_product_navbar&quot;}" href="https://github.com/features/security"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-shield-check color-fg-subtle mr-3"> <path d="M16.53 9.78a.75.75 0 0 0-1.06-1.06L11 13.19l-1.97-1.97a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l5-5Z"></path><path d="m12.54.637 8.25 2.675A1.75 1.75 0 0 1 22 4.976V10c0 6.19-3.771 10.704-9.401 12.83a1.704 1.704 0 0 1-1.198 0C5.77 20.705 2 16.19 2 10V4.976c0-.758.489-1.43 1.21-1.664L11.46.637a1.748 1.748 0 0 1 1.08 0Zm-.617 1.426-8.25 2.676a.249.249 0 0 0-.173.237V10c0 5.46 3.28 9.483 8.43 11.426a.199.199 0 0 0 .14 0C17.22 19.483 20.5 15.461 20.5 10V4.976a.25.25 0 0 0-.173-.237l-8.25-2.676a.253.253 0 0 0-.154 0Z"></path> </svg> <div> <div class="color-fg-default h4">Security</div> Find and fix vulnerabilities </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;actions&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;actions_link_product_navbar&quot;}" href="https://github.com/features/actions"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-workflow color-fg-subtle mr-3"> <path d="M1 3a2 2 0 0 1 2-2h6.5a2 2 0 0 1 2 2v6.5a2 2 0 0 1-2 2H7v4.063C7 16.355 7.644 17 8.438 17H12.5v-2.5a2 2 0 0 1 2-2H21a2 2 0 0 1 2 2V21a2 2 0 0 1-2 2h-6.5a2 2 0 0 1-2-2v-2.5H8.437A2.939 2.939 0 0 1 5.5 15.562V11.5H3a2 2 0 0 1-2-2Zm2-.5a.5.5 0 0 0-.5.5v6.5a.5.5 0 0 0 .5.5h6.5a.5.5 0 0 0 .5-.5V3a.5.5 0 0 0-.5-.5ZM14.5 14a.5.5 0 0 0-.5.5V21a.5.5 0 0 0 .5.5H21a.5.5 0 0 0 .5-.5v-6.5a.5.5 0 0 0-.5-.5Z"></path> </svg> <div> <div class="color-fg-default h4">Actions</div> Automate any workflow </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;codespaces&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;codespaces_link_product_navbar&quot;}" href="https://github.com/features/codespaces"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-codespaces color-fg-subtle mr-3"> <path d="M3.5 3.75C3.5 2.784 4.284 2 5.25 2h13.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 18.75 13H5.25a1.75 1.75 0 0 1-1.75-1.75Zm-2 12c0-.966.784-1.75 1.75-1.75h17.5c.966 0 1.75.784 1.75 1.75v4a1.75 1.75 0 0 1-1.75 1.75H3.25a1.75 1.75 0 0 1-1.75-1.75ZM5.25 3.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h13.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Zm-2 12a.25.25 0 0 0-.25.25v4c0 .138.112.25.25.25h17.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25Z"></path><path d="M10 17.75a.75.75 0 0 1 .75-.75h6.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1-.75-.75Zm-4 0a.75.75 0 0 1 .75-.75h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1-.75-.75Z"></path> </svg> <div> <div class="color-fg-default h4">Codespaces</div> Instant dev environments </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;issues&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;issues_link_product_navbar&quot;}" href="https://github.com/features/issues"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-issue-opened color-fg-subtle mr-3"> <path d="M12 1c6.075 0 11 4.925 11 11s-4.925 11-11 11S1 18.075 1 12 5.925 1 12 1ZM2.5 12a9.5 9.5 0 0 0 9.5 9.5 9.5 9.5 0 0 0 9.5-9.5A9.5 9.5 0 0 0 12 2.5 9.5 9.5 0 0 0 2.5 12Zm9.5 2a2 2 0 1 1-.001-3.999A2 2 0 0 1 12 14Z"></path> </svg> <div> <div class="color-fg-default h4">Issues</div> Plan and track work </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;code_review&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;code_review_link_product_navbar&quot;}" href="https://github.com/features/code-review"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-code-review color-fg-subtle mr-3"> <path d="M10.3 6.74a.75.75 0 0 1-.04 1.06l-2.908 2.7 2.908 2.7a.75.75 0 1 1-1.02 1.1l-3.5-3.25a.75.75 0 0 1 0-1.1l3.5-3.25a.75.75 0 0 1 1.06.04Zm3.44 1.06a.75.75 0 1 1 1.02-1.1l3.5 3.25a.75.75 0 0 1 0 1.1l-3.5 3.25a.75.75 0 1 1-1.02-1.1l2.908-2.7-2.908-2.7Z"></path><path d="M1.5 4.25c0-.966.784-1.75 1.75-1.75h17.5c.966 0 1.75.784 1.75 1.75v12.5a1.75 1.75 0 0 1-1.75 1.75h-9.69l-3.573 3.573A1.458 1.458 0 0 1 5 21.043V18.5H3.25a1.75 1.75 0 0 1-1.75-1.75ZM3.25 4a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h2.5a.75.75 0 0 1 .75.75v3.19l3.72-3.72a.749.749 0 0 1 .53-.22h10a.25.25 0 0 0 .25-.25V4.25a.25.25 0 0 0-.25-.25Z"></path> </svg> <div> <div class="color-fg-default h4">Code Review</div> Manage code changes </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;discussions&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;discussions_link_product_navbar&quot;}" href="https://github.com/features/discussions"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-comment-discussion color-fg-subtle mr-3"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"></path><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"></path> </svg> <div> <div class="color-fg-default h4">Discussions</div> Collaborate outside of code </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;code_search&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;code_search_link_product_navbar&quot;}" href="https://github.com/features/code-search"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-code-square color-fg-subtle mr-3"> <path d="M10.3 8.24a.75.75 0 0 1-.04 1.06L7.352 12l2.908 2.7a.75.75 0 1 1-1.02 1.1l-3.5-3.25a.75.75 0 0 1 0-1.1l3.5-3.25a.75.75 0 0 1 1.06.04Zm3.44 1.06a.75.75 0 1 1 1.02-1.1l3.5 3.25a.75.75 0 0 1 0 1.1l-3.5 3.25a.75.75 0 1 1-1.02-1.1l2.908-2.7-2.908-2.7Z"></path><path d="M2 3.75C2 2.784 2.784 2 3.75 2h16.5c.966 0 1.75.784 1.75 1.75v16.5A1.75 1.75 0 0 1 20.25 22H3.75A1.75 1.75 0 0 1 2 20.25Zm1.75-.25a.25.25 0 0 0-.25.25v16.5c0 .138.112.25.25.25h16.5a.25.25 0 0 0 .25-.25V3.75a.25.25 0 0 0-.25-.25Z"></path> </svg> <div> <div class="color-fg-default h4">Code Search</div> Find more, search less </div> </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="product-explore-heading">Explore</span> <ul class="list-style-none f5" aria-labelledby="product-explore-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;all_features&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;all_features_link_product_navbar&quot;}" href="https://github.com/features"> All features </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;documentation&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;documentation_link_product_navbar&quot;}" href="https://docs.github.com"> Documentation <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_skills&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_skills_link_product_navbar&quot;}" href="https://skills.github.com"> GitHub Skills <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;blog&quot;,&quot;context&quot;:&quot;product&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;blog_link_product_navbar&quot;}" href="https://github.blog"> Blog <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Solutions <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 pb-lg-3 mb-3 mb-lg-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-company-size-heading">By company size</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-company-size-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;enterprises&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;enterprises_link_solutions_navbar&quot;}" href="https://github.com/enterprise"> Enterprises </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;small_and_medium_teams&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;small_and_medium_teams_link_solutions_navbar&quot;}" href="https://github.com/team"> Small and medium teams </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;startups&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;startups_link_solutions_navbar&quot;}" href="https://github.com/enterprise/startups"> Startups </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;nonprofits&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;nonprofits_link_solutions_navbar&quot;}" href="/solutions/industry/nonprofits"> Nonprofits </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-use-case-heading">By use case</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-use-case-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devsecops&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devsecops_link_solutions_navbar&quot;}" href="/solutions/use-case/devsecops"> DevSecOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devops&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devops_link_solutions_navbar&quot;}" href="/solutions/use-case/devops"> DevOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ci_cd&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ci_cd_link_solutions_navbar&quot;}" href="/solutions/use-case/ci-cd"> CI/CD </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all_use_cases&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_use_cases_link_solutions_navbar&quot;}" href="/solutions/use-case"> View all use cases </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="solutions-by-industry-heading">By industry</span> <ul class="list-style-none f5" aria-labelledby="solutions-by-industry-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;healthcare&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;healthcare_link_solutions_navbar&quot;}" href="/solutions/industry/healthcare"> Healthcare </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;financial_services&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;financial_services_link_solutions_navbar&quot;}" href="/solutions/industry/financial-services"> Financial services </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;manufacturing&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;manufacturing_link_solutions_navbar&quot;}" href="/solutions/industry/manufacturing"> Manufacturing </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;government&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;government_link_solutions_navbar&quot;}" href="/solutions/industry/government"> Government </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all_industries&quot;,&quot;context&quot;:&quot;solutions&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_industries_link_solutions_navbar&quot;}" href="/solutions/industry"> View all industries </a></li> </ul> </div> </div> <div class="HeaderMenu-trailing-link rounded-bottom-2 flex-shrink-0 mt-lg-4 px-lg-4 py-4 py-lg-3 f5 text-semibold"> <a href="/solutions"> View all solutions <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-right HeaderMenu-trailing-link-icon"> <path d="M6.22 3.22a.75.75 0 0 1 1.06 0l4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L9.94 8 6.22 4.28a.75.75 0 0 1 0-1.06Z"></path> </svg> </a> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Resources <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 d-lg-flex flex-wrap dropdown-menu-wide"> <div class="HeaderMenu-column px-lg-4 border-lg-right mb-4 mb-lg-0 pr-lg-7"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="resources-topics-heading">Topics</span> <ul class="list-style-none f5" aria-labelledby="resources-topics-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ai&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ai_link_resources_navbar&quot;}" href="/resources/articles/ai"> AI </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;devops&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;devops_link_resources_navbar&quot;}" href="/resources/articles/devops"> DevOps </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;security&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;security_link_resources_navbar&quot;}" href="/resources/articles/security"> Security </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;software_development&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;software_development_link_resources_navbar&quot;}" href="/resources/articles/software-development"> Software Development </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;view_all&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;view_all_link_resources_navbar&quot;}" href="/resources/articles"> View all </a></li> </ul> </div> </div> <div class="HeaderMenu-column px-lg-4"> <div class="border-bottom pb-3 pb-lg-0 border-lg-bottom-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="resources-explore-heading">Explore</span> <ul class="list-style-none f5" aria-labelledby="resources-explore-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;learning_pathways&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;learning_pathways_link_resources_navbar&quot;}" href="https://resources.github.com/learn/pathways"> Learning Pathways <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;events_amp_webinars&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;events_amp_webinars_link_resources_navbar&quot;}" href="https://resources.github.com"> Events &amp; Webinars <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;ebooks_amp_whitepapers&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;ebooks_amp_whitepapers_link_resources_navbar&quot;}" href="https://github.com/resources/whitepapers"> Ebooks &amp; Whitepapers </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;customer_stories&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;customer_stories_link_resources_navbar&quot;}" href="https://github.com/customer-stories"> Customer Stories </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary Link--external" target="_blank" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;partners&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;partners_link_resources_navbar&quot;}" href="https://partner.github.com"> Partners <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-link-external HeaderMenu-external-icon color-fg-subtle"> <path d="M3.75 2h3.5a.75.75 0 0 1 0 1.5h-3.5a.25.25 0 0 0-.25.25v8.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-3.5a.75.75 0 0 1 1.5 0v3.5A1.75 1.75 0 0 1 12.25 14h-8.5A1.75 1.75 0 0 1 2 12.25v-8.5C2 2.784 2.784 2 3.75 2Zm6.854-1h4.146a.25.25 0 0 1 .25.25v4.146a.25.25 0 0 1-.427.177L13.03 4.03 9.28 7.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.75-3.75-1.543-1.543A.25.25 0 0 1 10.604 1Z"></path> </svg> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;executive_insights&quot;,&quot;context&quot;:&quot;resources&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;executive_insights_link_resources_navbar&quot;}" href="https://github.com/solutions/executive-insights"> Executive Insights </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Open Source <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 px-lg-4"> <div class="HeaderMenu-column"> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;github_sponsors&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;github_sponsors_link_open_source_navbar&quot;}" href="/sponsors"> <div> <div class="color-fg-default h4">GitHub Sponsors</div> Fund open source developers </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;the_readme_project&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;the_readme_project_link_open_source_navbar&quot;}" href="https://github.com/readme"> <div> <div class="color-fg-default h4">The ReadME Project</div> GitHub community articles </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="open-source-repositories-heading">Repositories</span> <ul class="list-style-none f5" aria-labelledby="open-source-repositories-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;topics&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;topics_link_open_source_navbar&quot;}" href="https://github.com/topics"> Topics </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;trending&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;trending_link_open_source_navbar&quot;}" href="https://github.com/trending"> Trending </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;collections&quot;,&quot;context&quot;:&quot;open_source&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;collections_link_open_source_navbar&quot;}" href="https://github.com/collections"> Collections </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <button type="button" class="HeaderMenu-link border-0 width-full width-lg-auto px-0 px-lg-2 py-lg-2 no-wrap d-flex flex-items-center flex-justify-between js-details-target" aria-expanded="false"> Enterprise <svg opacity="0.5" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-chevron-down HeaderMenu-icon ml-1"> <path d="M12.78 5.22a.749.749 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.06 0L3.22 6.28a.749.749 0 1 1 1.06-1.06L8 8.939l3.72-3.719a.749.749 0 0 1 1.06 0Z"></path> </svg> </button> <div class="HeaderMenu-dropdown dropdown-menu rounded m-0 p-0 pt-2 pt-lg-4 position-relative position-lg-absolute left-0 left-lg-n3 pb-2 pb-lg-4 px-lg-4"> <div class="HeaderMenu-column"> <div class="border-bottom pb-3 pb-lg-0 pb-lg-3 mb-3 mb-lg-0 mb-lg-3"> <ul class="list-style-none f5" > <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;enterprise_platform&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;enterprise_platform_link_enterprise_navbar&quot;}" href="/enterprise"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-stack color-fg-subtle mr-3"> <path d="M11.063 1.456a1.749 1.749 0 0 1 1.874 0l8.383 5.316a1.751 1.751 0 0 1 0 2.956l-8.383 5.316a1.749 1.749 0 0 1-1.874 0L2.68 9.728a1.751 1.751 0 0 1 0-2.956Zm1.071 1.267a.25.25 0 0 0-.268 0L3.483 8.039a.25.25 0 0 0 0 .422l8.383 5.316a.25.25 0 0 0 .268 0l8.383-5.316a.25.25 0 0 0 0-.422Z"></path><path d="M1.867 12.324a.75.75 0 0 1 1.035-.232l8.964 5.685a.25.25 0 0 0 .268 0l8.964-5.685a.75.75 0 0 1 .804 1.267l-8.965 5.685a1.749 1.749 0 0 1-1.874 0l-8.965-5.685a.75.75 0 0 1-.231-1.035Z"></path><path d="M1.867 16.324a.75.75 0 0 1 1.035-.232l8.964 5.685a.25.25 0 0 0 .268 0l8.964-5.685a.75.75 0 0 1 .804 1.267l-8.965 5.685a1.749 1.749 0 0 1-1.874 0l-8.965-5.685a.75.75 0 0 1-.231-1.035Z"></path> </svg> <div> <div class="color-fg-default h4">Enterprise platform</div> AI-powered developer platform </div> </a></li> </ul> </div> <div class="border-bottom pb-3 pb-lg-0 border-bottom-0"> <span class="d-block h4 color-fg-default my-1" id="enterprise-available-add-ons-heading">Available add-ons</span> <ul class="list-style-none f5" aria-labelledby="enterprise-available-add-ons-heading"> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;advanced_security&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;advanced_security_link_enterprise_navbar&quot;}" href="https://github.com/enterprise/advanced-security"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-shield-check color-fg-subtle mr-3"> <path d="M16.53 9.78a.75.75 0 0 0-1.06-1.06L11 13.19l-1.97-1.97a.75.75 0 0 0-1.06 1.06l2.5 2.5a.75.75 0 0 0 1.06 0l5-5Z"></path><path d="m12.54.637 8.25 2.675A1.75 1.75 0 0 1 22 4.976V10c0 6.19-3.771 10.704-9.401 12.83a1.704 1.704 0 0 1-1.198 0C5.77 20.705 2 16.19 2 10V4.976c0-.758.489-1.43 1.21-1.664L11.46.637a1.748 1.748 0 0 1 1.08 0Zm-.617 1.426-8.25 2.676a.249.249 0 0 0-.173.237V10c0 5.46 3.28 9.483 8.43 11.426a.199.199 0 0 0 .14 0C17.22 19.483 20.5 15.461 20.5 10V4.976a.25.25 0 0 0-.173-.237l-8.25-2.676a.253.253 0 0 0-.154 0Z"></path> </svg> <div> <div class="color-fg-default h4">Advanced Security</div> Enterprise-grade security features </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description pb-lg-3" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;copilot_for_business&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;copilot_for_business_link_enterprise_navbar&quot;}" href="/features/copilot/copilot-business"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-copilot color-fg-subtle mr-3"> <path d="M23.922 16.992c-.861 1.495-5.859 5.023-11.922 5.023-6.063 0-11.061-3.528-11.922-5.023A.641.641 0 0 1 0 16.736v-2.869a.841.841 0 0 1 .053-.22c.372-.935 1.347-2.292 2.605-2.656.167-.429.414-1.055.644-1.517a10.195 10.195 0 0 1-.052-1.086c0-1.331.282-2.499 1.132-3.368.397-.406.89-.717 1.474-.952 1.399-1.136 3.392-2.093 6.122-2.093 2.731 0 4.767.957 6.166 2.093.584.235 1.077.546 1.474.952.85.869 1.132 2.037 1.132 3.368 0 .368-.014.733-.052 1.086.23.462.477 1.088.644 1.517 1.258.364 2.233 1.721 2.605 2.656a.832.832 0 0 1 .053.22v2.869a.641.641 0 0 1-.078.256ZM12.172 11h-.344a4.323 4.323 0 0 1-.355.508C10.703 12.455 9.555 13 7.965 13c-1.725 0-2.989-.359-3.782-1.259a2.005 2.005 0 0 1-.085-.104L4 11.741v6.585c1.435.779 4.514 2.179 8 2.179 3.486 0 6.565-1.4 8-2.179v-6.585l-.098-.104s-.033.045-.085.104c-.793.9-2.057 1.259-3.782 1.259-1.59 0-2.738-.545-3.508-1.492a4.323 4.323 0 0 1-.355-.508h-.016.016Zm.641-2.935c.136 1.057.403 1.913.878 2.497.442.544 1.134.938 2.344.938 1.573 0 2.292-.337 2.657-.751.384-.435.558-1.15.558-2.361 0-1.14-.243-1.847-.705-2.319-.477-.488-1.319-.862-2.824-1.025-1.487-.161-2.192.138-2.533.529-.269.307-.437.808-.438 1.578v.021c0 .265.021.562.063.893Zm-1.626 0c.042-.331.063-.628.063-.894v-.02c-.001-.77-.169-1.271-.438-1.578-.341-.391-1.046-.69-2.533-.529-1.505.163-2.347.537-2.824 1.025-.462.472-.705 1.179-.705 2.319 0 1.211.175 1.926.558 2.361.365.414 1.084.751 2.657.751 1.21 0 1.902-.394 2.344-.938.475-.584.742-1.44.878-2.497Z"></path><path d="M14.5 14.25a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Zm-5 0a1 1 0 0 1 1 1v2a1 1 0 0 1-2 0v-2a1 1 0 0 1 1-1Z"></path> </svg> <div> <div class="color-fg-default h4">Copilot for business</div> Enterprise-grade AI features </div> </a></li> <li> <a class="HeaderMenu-dropdown-link d-block no-underline position-relative py-2 Link--secondary d-flex flex-items-center Link--has-description" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;premium_support&quot;,&quot;context&quot;:&quot;enterprise&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;premium_support_link_enterprise_navbar&quot;}" href="/premium-support"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-comment-discussion color-fg-subtle mr-3"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 14.25 14H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 15.543V14H1.75A1.75 1.75 0 0 1 0 12.25v-9.5C0 1.784.784 1 1.75 1ZM1.5 2.75v9.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-9.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Z"></path><path d="M22.5 8.75a.25.25 0 0 0-.25-.25h-3.5a.75.75 0 0 1 0-1.5h3.5c.966 0 1.75.784 1.75 1.75v9.5A1.75 1.75 0 0 1 22.25 20H21v1.543a1.457 1.457 0 0 1-2.487 1.03L15.939 20H10.75A1.75 1.75 0 0 1 9 18.25v-1.465a.75.75 0 0 1 1.5 0v1.465c0 .138.112.25.25.25h5.5a.75.75 0 0 1 .53.22l2.72 2.72v-2.19a.75.75 0 0 1 .75-.75h2a.25.25 0 0 0 .25-.25v-9.5Z"></path> </svg> <div> <div class="color-fg-default h4">Premium Support</div> Enterprise-grade 24/7 support </div> </a></li> </ul> </div> </div> </div> </li> <li class="HeaderMenu-item position-relative flex-wrap flex-justify-between flex-items-center d-block d-lg-flex flex-lg-nowrap flex-lg-items-center js-details-container js-header-menu-item"> <a class="HeaderMenu-link no-underline px-0 px-lg-2 py-3 py-lg-2 d-block d-lg-inline-block" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;pricing&quot;,&quot;context&quot;:&quot;global&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;pricing_link_global_navbar&quot;}" href="https://github.com/pricing">Pricing</a> </li> </ul> </nav> <div class="d-flex flex-column flex-lg-row width-full flex-justify-end flex-lg-items-center text-center mt-3 mt-lg-0 text-lg-left ml-lg-3"> <qbsearch-input class="search-input" data-scope="repo:JasonKessler/scattertext" data-custom-scopes-path="/search/custom_scopes" data-delete-custom-scopes-csrf="AmMlmERF3Cp68MZQUu3ue9EgQz9FWLI8LiXgUMTUXMnCmos4645g6v_74znbketb3U51OgPwIXj5Y6Na9dVmNQ" data-max-custom-scopes="10" data-header-redesign-enabled="false" data-initial-value="" data-blackbird-suggestions-path="/search/suggestions" data-jump-to-suggestions-path="/_graphql/GetSuggestedNavigationDestinations" data-current-repository="JasonKessler/scattertext" data-current-org="" data-current-owner="JasonKessler" data-logged-in="false" data-copilot-chat-enabled="false" data-nl-search-enabled="false" data-retain-scroll-position="true"> <div class="search-input-container search-with-dialog position-relative d-flex flex-row flex-items-center mr-4 rounded" data-action="click:qbsearch-input#searchInputContainerClicked" > <button type="button" class="header-search-button placeholder input-button form-control d-flex flex-1 flex-self-stretch flex-items-center no-wrap width-full py-0 pl-2 pr-0 text-left border-0 box-shadow-none" data-target="qbsearch-input.inputButton" aria-label="Search or jump to…" aria-haspopup="dialog" placeholder="Search or jump to..." data-hotkey=s,/ autocapitalize="off" data-analytics-event="{&quot;location&quot;:&quot;navbar&quot;,&quot;action&quot;:&quot;searchbar&quot;,&quot;context&quot;:&quot;global&quot;,&quot;tag&quot;:&quot;input&quot;,&quot;label&quot;:&quot;searchbar_input_global_navbar&quot;}" data-action="click:qbsearch-input#handleExpand" > <div class="mr-2 color-fg-muted"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </div> <span class="flex-1" data-target="qbsearch-input.inputButtonText">Search or jump to...</span> <div class="d-flex" data-target="qbsearch-input.hotkeyIndicator"> <svg xmlns="http://www.w3.org/2000/svg" width="22" height="20" aria-hidden="true" class="mr-1"><path fill="none" stroke="#979A9C" opacity=".4" d="M3.5.5h12c1.7 0 3 1.3 3 3v13c0 1.7-1.3 3-3 3h-12c-1.7 0-3-1.3-3-3v-13c0-1.7 1.3-3 3-3z"></path><path fill="#979A9C" d="M11.8 6L8 15.1h-.9L10.8 6h1z"></path></svg> </div> </button> <input type="hidden" name="type" class="js-site-search-type-field"> <div class="Overlay--hidden " data-modal-dialog-overlay> <modal-dialog data-action="close:qbsearch-input#handleClose cancel:qbsearch-input#handleClose" data-target="qbsearch-input.searchSuggestionsDialog" role="dialog" id="search-suggestions-dialog" aria-modal="true" aria-labelledby="search-suggestions-dialog-header" data-view-component="true" class="Overlay Overlay--width-large Overlay--height-auto"> <h1 id="search-suggestions-dialog-header" class="sr-only">Search code, repositories, users, issues, pull requests...</h1> <div class="Overlay-body Overlay-body--paddingNone"> <div data-view-component="true"> <div class="search-suggestions position-fixed width-full color-shadow-large border color-fg-default color-bg-default overflow-hidden d-flex flex-column query-builder-container" style="border-radius: 12px;" data-target="qbsearch-input.queryBuilderContainer" hidden > <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="query-builder-test-form" action="" accept-charset="UTF-8" method="get"> <query-builder data-target="qbsearch-input.queryBuilder" id="query-builder-query-builder-test" data-filter-key=":" data-view-component="true" class="QueryBuilder search-query-builder"> <div class="FormControl FormControl--fullWidth"> <label id="query-builder-test-label" for="query-builder-test" class="FormControl-label sr-only"> Search </label> <div class="QueryBuilder-StyledInput width-fit " data-target="query-builder.styledInput" > <span id="query-builder-test-leadingvisual-wrap" class="FormControl-input-leadingVisualWrap QueryBuilder-leadingVisualWrap"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search FormControl-input-leadingVisual"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </span> <div data-target="query-builder.styledInputContainer" class="QueryBuilder-StyledInputContainer"> <div aria-hidden="true" class="QueryBuilder-StyledInputContent" data-target="query-builder.styledInputContent" ></div> <div class="QueryBuilder-InputWrapper"> <div aria-hidden="true" class="QueryBuilder-Sizer" data-target="query-builder.sizer"></div> <input id="query-builder-test" name="query-builder-test" value="" autocomplete="off" type="text" role="combobox" spellcheck="false" aria-expanded="false" aria-describedby="validation-e49bee3b-273b-45f0-a2b8-ec1a3c1520a1" data-target="query-builder.input" data-action=" input:query-builder#inputChange blur:query-builder#inputBlur keydown:query-builder#inputKeydown focus:query-builder#inputFocus " data-view-component="true" class="FormControl-input QueryBuilder-Input FormControl-medium" /> </div> </div> <span class="sr-only" id="query-builder-test-clear">Clear</span> <button role="button" id="query-builder-test-clear-button" aria-labelledby="query-builder-test-clear query-builder-test-label" data-target="query-builder.clearButton" data-action=" click:query-builder#clear focus:query-builder#clearButtonFocus blur:query-builder#clearButtonBlur " variant="small" hidden="hidden" type="button" data-view-component="true" class="Button Button--iconOnly Button--invisible Button--medium mr-1 px-2 py-0 d-flex flex-items-center rounded-1 color-fg-muted"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x-circle-fill Button-visual"> <path d="M2.343 13.657A8 8 0 1 1 13.658 2.343 8 8 0 0 1 2.343 13.657ZM6.03 4.97a.751.751 0 0 0-1.042.018.751.751 0 0 0-.018 1.042L6.94 8 4.97 9.97a.749.749 0 0 0 .326 1.275.749.749 0 0 0 .734-.215L8 9.06l1.97 1.97a.749.749 0 0 0 1.275-.326.749.749 0 0 0-.215-.734L9.06 8l1.97-1.97a.749.749 0 0 0-.326-1.275.749.749 0 0 0-.734.215L8 6.94Z"></path> </svg> </button> </div> <template id="search-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-search"> <path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path> </svg> </template> <template id="code-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </template> <template id="file-code-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-file-code"> <path d="M4 1.75C4 .784 4.784 0 5.75 0h5.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v8.586A1.75 1.75 0 0 1 14.25 15h-9a.75.75 0 0 1 0-1.5h9a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 10 4.25V1.5H5.75a.25.25 0 0 0-.25.25v2.5a.75.75 0 0 1-1.5 0Zm1.72 4.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734l1.47-1.47-1.47-1.47a.75.75 0 0 1 0-1.06ZM3.28 7.78 1.81 9.25l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Zm8.22-6.218V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path> </svg> </template> <template id="history-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-history"> <path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path> </svg> </template> <template id="repo-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo"> <path d="M2 2.5A2.5 2.5 0 0 1 4.5 0h8.75a.75.75 0 0 1 .75.75v12.5a.75.75 0 0 1-.75.75h-2.5a.75.75 0 0 1 0-1.5h1.75v-2h-8a1 1 0 0 0-.714 1.7.75.75 0 1 1-1.072 1.05A2.495 2.495 0 0 1 2 11.5Zm10.5-1h-8a1 1 0 0 0-1 1v6.708A2.486 2.486 0 0 1 4.5 9h8ZM5 12.25a.25.25 0 0 1 .25-.25h3.5a.25.25 0 0 1 .25.25v3.25a.25.25 0 0 1-.4.2l-1.45-1.087a.249.249 0 0 0-.3 0L5.4 15.7a.25.25 0 0 1-.4-.2Z"></path> </svg> </template> <template id="bookmark-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bookmark"> <path d="M3 2.75C3 1.784 3.784 1 4.75 1h6.5c.966 0 1.75.784 1.75 1.75v11.5a.75.75 0 0 1-1.227.579L8 11.722l-3.773 3.107A.751.751 0 0 1 3 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v9.91l3.023-2.489a.75.75 0 0 1 .954 0l3.023 2.49V2.75a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="plus-circle-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-plus-circle"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm7.25-3.25v2.5h2.5a.75.75 0 0 1 0 1.5h-2.5v2.5a.75.75 0 0 1-1.5 0v-2.5h-2.5a.75.75 0 0 1 0-1.5h2.5v-2.5a.75.75 0 0 1 1.5 0Z"></path> </svg> </template> <template id="circle-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> </template> <template id="trash-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-trash"> <path d="M11 1.75V3h2.25a.75.75 0 0 1 0 1.5H2.75a.75.75 0 0 1 0-1.5H5V1.75C5 .784 5.784 0 6.75 0h2.5C10.216 0 11 .784 11 1.75ZM4.496 6.675l.66 6.6a.25.25 0 0 0 .249.225h5.19a.25.25 0 0 0 .249-.225l.66-6.6a.75.75 0 0 1 1.492.149l-.66 6.6A1.748 1.748 0 0 1 10.595 15h-5.19a1.75 1.75 0 0 1-1.741-1.575l-.66-6.6a.75.75 0 1 1 1.492-.15ZM6.5 1.75V3h3V1.75a.25.25 0 0 0-.25-.25h-2.5a.25.25 0 0 0-.25.25Z"></path> </svg> </template> <template id="team-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-people"> <path d="M2 5.5a3.5 3.5 0 1 1 5.898 2.549 5.508 5.508 0 0 1 3.034 4.084.75.75 0 1 1-1.482.235 4 4 0 0 0-7.9 0 .75.75 0 0 1-1.482-.236A5.507 5.507 0 0 1 3.102 8.05 3.493 3.493 0 0 1 2 5.5ZM11 4a3.001 3.001 0 0 1 2.22 5.018 5.01 5.01 0 0 1 2.56 3.012.749.749 0 0 1-.885.954.752.752 0 0 1-.549-.514 3.507 3.507 0 0 0-2.522-2.372.75.75 0 0 1-.574-.73v-.352a.75.75 0 0 1 .416-.672A1.5 1.5 0 0 0 11 5.5.75.75 0 0 1 11 4Zm-5.5-.5a2 2 0 1 0-.001 3.999A2 2 0 0 0 5.5 3.5Z"></path> </svg> </template> <template id="project-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-project"> <path d="M1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25V1.75C0 .784.784 0 1.75 0ZM1.5 1.75v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25ZM11.75 3a.75.75 0 0 1 .75.75v7.5a.75.75 0 0 1-1.5 0v-7.5a.75.75 0 0 1 .75-.75Zm-8.25.75a.75.75 0 0 1 1.5 0v5.5a.75.75 0 0 1-1.5 0ZM8 3a.75.75 0 0 1 .75.75v3.5a.75.75 0 0 1-1.5 0v-3.5A.75.75 0 0 1 8 3Z"></path> </svg> </template> <template id="pencil-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pencil"> <path d="M11.013 1.427a1.75 1.75 0 0 1 2.474 0l1.086 1.086a1.75 1.75 0 0 1 0 2.474l-8.61 8.61c-.21.21-.47.364-.756.445l-3.251.93a.75.75 0 0 1-.927-.928l.929-3.25c.081-.286.235-.547.445-.758l8.61-8.61Zm.176 4.823L9.75 4.81l-6.286 6.287a.253.253 0 0 0-.064.108l-.558 1.953 1.953-.558a.253.253 0 0 0 .108-.064Zm1.238-3.763a.25.25 0 0 0-.354 0L10.811 3.75l1.439 1.44 1.263-1.263a.25.25 0 0 0 0-.354Z"></path> </svg> </template> <template id="copilot-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copilot"> <path d="M7.998 15.035c-4.562 0-7.873-2.914-7.998-3.749V9.338c.085-.628.677-1.686 1.588-2.065.013-.07.024-.143.036-.218.029-.183.06-.384.126-.612-.201-.508-.254-1.084-.254-1.656 0-.87.128-1.769.693-2.484.579-.733 1.494-1.124 2.724-1.261 1.206-.134 2.262.034 2.944.765.05.053.096.108.139.165.044-.057.094-.112.143-.165.682-.731 1.738-.899 2.944-.765 1.23.137 2.145.528 2.724 1.261.566.715.693 1.614.693 2.484 0 .572-.053 1.148-.254 1.656.066.228.098.429.126.612.012.076.024.148.037.218.924.385 1.522 1.471 1.591 2.095v1.872c0 .766-3.351 3.795-8.002 3.795Zm0-1.485c2.28 0 4.584-1.11 5.002-1.433V7.862l-.023-.116c-.49.21-1.075.291-1.727.291-1.146 0-2.059-.327-2.71-.991A3.222 3.222 0 0 1 8 6.303a3.24 3.24 0 0 1-.544.743c-.65.664-1.563.991-2.71.991-.652 0-1.236-.081-1.727-.291l-.023.116v4.255c.419.323 2.722 1.433 5.002 1.433ZM6.762 2.83c-.193-.206-.637-.413-1.682-.297-1.019.113-1.479.404-1.713.7-.247.312-.369.789-.369 1.554 0 .793.129 1.171.308 1.371.162.181.519.379 1.442.379.853 0 1.339-.235 1.638-.54.315-.322.527-.827.617-1.553.117-.935-.037-1.395-.241-1.614Zm4.155-.297c-1.044-.116-1.488.091-1.681.297-.204.219-.359.679-.242 1.614.091.726.303 1.231.618 1.553.299.305.784.54 1.638.54.922 0 1.28-.198 1.442-.379.179-.2.308-.578.308-1.371 0-.765-.123-1.242-.37-1.554-.233-.296-.693-.587-1.713-.7Z"></path><path d="M6.25 9.037a.75.75 0 0 1 .75.75v1.501a.75.75 0 0 1-1.5 0V9.787a.75.75 0 0 1 .75-.75Zm4.25.75v1.501a.75.75 0 0 1-1.5 0V9.787a.75.75 0 0 1 1.5 0Z"></path> </svg> </template> <template id="copilot-error-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copilot-error"> <path d="M16 11.24c0 .112-.072.274-.21.467L13 9.688V7.862l-.023-.116c-.49.21-1.075.291-1.727.291-.198 0-.388-.009-.571-.029L6.833 5.226a4.01 4.01 0 0 0 .17-.782c.117-.935-.037-1.395-.241-1.614-.193-.206-.637-.413-1.682-.297-.683.076-1.115.231-1.395.415l-1.257-.91c.579-.564 1.413-.877 2.485-.996 1.206-.134 2.262.034 2.944.765.05.053.096.108.139.165.044-.057.094-.112.143-.165.682-.731 1.738-.899 2.944-.765 1.23.137 2.145.528 2.724 1.261.566.715.693 1.614.693 2.484 0 .572-.053 1.148-.254 1.656.066.228.098.429.126.612.012.076.024.148.037.218.924.385 1.522 1.471 1.591 2.095Zm-5.083-8.707c-1.044-.116-1.488.091-1.681.297-.204.219-.359.679-.242 1.614.091.726.303 1.231.618 1.553.299.305.784.54 1.638.54.922 0 1.28-.198 1.442-.379.179-.2.308-.578.308-1.371 0-.765-.123-1.242-.37-1.554-.233-.296-.693-.587-1.713-.7Zm2.511 11.074c-1.393.776-3.272 1.428-5.43 1.428-4.562 0-7.873-2.914-7.998-3.749V9.338c.085-.628.677-1.686 1.588-2.065.013-.07.024-.143.036-.218.029-.183.06-.384.126-.612-.18-.455-.241-.963-.252-1.475L.31 4.107A.747.747 0 0 1 0 3.509V3.49a.748.748 0 0 1 .625-.73c.156-.026.306.047.435.139l14.667 10.578a.592.592 0 0 1 .227.264.752.752 0 0 1 .046.249v.022a.75.75 0 0 1-1.19.596Zm-1.367-.991L5.635 7.964a5.128 5.128 0 0 1-.889.073c-.652 0-1.236-.081-1.727-.291l-.023.116v4.255c.419.323 2.722 1.433 5.002 1.433 1.539 0 3.089-.505 4.063-.934Z"></path> </svg> </template> <template id="workflow-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-workflow"> <path d="M0 1.75C0 .784.784 0 1.75 0h3.5C6.216 0 7 .784 7 1.75v3.5A1.75 1.75 0 0 1 5.25 7H4v4a1 1 0 0 0 1 1h4v-1.25C9 9.784 9.784 9 10.75 9h3.5c.966 0 1.75.784 1.75 1.75v3.5A1.75 1.75 0 0 1 14.25 16h-3.5A1.75 1.75 0 0 1 9 14.25v-.75H5A2.5 2.5 0 0 1 2.5 11V7h-.75A1.75 1.75 0 0 1 0 5.25Zm1.75-.25a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Zm9 9a.25.25 0 0 0-.25.25v3.5c0 .138.112.25.25.25h3.5a.25.25 0 0 0 .25-.25v-3.5a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="book-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> </template> <template id="code-review-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code-review"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 13H8.061l-2.574 2.573A1.458 1.458 0 0 1 3 14.543V13H1.75A1.75 1.75 0 0 1 0 11.25v-8.5C0 1.784.784 1 1.75 1ZM1.5 2.75v8.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h6.5a.25.25 0 0 0 .25-.25v-8.5a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm5.28 1.72a.75.75 0 0 1 0 1.06L5.31 7l1.47 1.47a.751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018l-2-2a.75.75 0 0 1 0-1.06l2-2a.75.75 0 0 1 1.06 0Zm2.44 0a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L10.69 7 9.22 5.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </template> <template id="codespaces-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-codespaces"> <path d="M0 11.25c0-.966.784-1.75 1.75-1.75h12.5c.966 0 1.75.784 1.75 1.75v3A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm2-9.5C2 .784 2.784 0 3.75 0h8.5C13.216 0 14 .784 14 1.75v5a1.75 1.75 0 0 1-1.75 1.75h-8.5A1.75 1.75 0 0 1 2 6.75Zm1.75-.25a.25.25 0 0 0-.25.25v5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25v-5a.25.25 0 0 0-.25-.25Zm-2 9.5a.25.25 0 0 0-.25.25v3c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-3a.25.25 0 0 0-.25-.25Z"></path><path d="M7 12.75a.75.75 0 0 1 .75-.75h4.5a.75.75 0 0 1 0 1.5h-4.5a.75.75 0 0 1-.75-.75Zm-4 0a.75.75 0 0 1 .75-.75h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1-.75-.75Z"></path> </svg> </template> <template id="comment-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment"> <path d="M1 2.75C1 1.784 1.784 1 2.75 1h10.5c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 13.25 12H9.06l-2.573 2.573A1.458 1.458 0 0 1 4 13.543V12H2.75A1.75 1.75 0 0 1 1 10.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h2a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h4.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> </template> <template id="comment-discussion-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> </template> <template id="organization-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-organization"> <path d="M1.75 16A1.75 1.75 0 0 1 0 14.25V1.75C0 .784.784 0 1.75 0h8.5C11.216 0 12 .784 12 1.75v12.5c0 .085-.006.168-.018.25h2.268a.25.25 0 0 0 .25-.25V8.285a.25.25 0 0 0-.111-.208l-1.055-.703a.749.749 0 1 1 .832-1.248l1.055.703c.487.325.779.871.779 1.456v5.965A1.75 1.75 0 0 1 14.25 16h-3.5a.766.766 0 0 1-.197-.026c-.099.017-.2.026-.303.026h-3a.75.75 0 0 1-.75-.75V14h-1v1.25a.75.75 0 0 1-.75.75Zm-.25-1.75c0 .138.112.25.25.25H4v-1.25a.75.75 0 0 1 .75-.75h2.5a.75.75 0 0 1 .75.75v1.25h2.25a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25ZM3.75 6h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5ZM3 3.75A.75.75 0 0 1 3.75 3h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 3.75Zm4 3A.75.75 0 0 1 7.75 6h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 7 6.75ZM7.75 3h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5ZM3 9.75A.75.75 0 0 1 3.75 9h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 9.75ZM7.75 9h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5Z"></path> </svg> </template> <template id="rocket-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-rocket"> <path d="M14.064 0h.186C15.216 0 16 .784 16 1.75v.186a8.752 8.752 0 0 1-2.564 6.186l-.458.459c-.314.314-.641.616-.979.904v3.207c0 .608-.315 1.172-.833 1.49l-2.774 1.707a.749.749 0 0 1-1.11-.418l-.954-3.102a1.214 1.214 0 0 1-.145-.125L3.754 9.816a1.218 1.218 0 0 1-.124-.145L.528 8.717a.749.749 0 0 1-.418-1.11l1.71-2.774A1.748 1.748 0 0 1 3.31 4h3.204c.288-.338.59-.665.904-.979l.459-.458A8.749 8.749 0 0 1 14.064 0ZM8.938 3.623h-.002l-.458.458c-.76.76-1.437 1.598-2.02 2.5l-1.5 2.317 2.143 2.143 2.317-1.5c.902-.583 1.74-1.26 2.499-2.02l.459-.458a7.25 7.25 0 0 0 2.123-5.127V1.75a.25.25 0 0 0-.25-.25h-.186a7.249 7.249 0 0 0-5.125 2.123ZM3.56 14.56c-.732.732-2.334 1.045-3.005 1.148a.234.234 0 0 1-.201-.064.234.234 0 0 1-.064-.201c.103-.671.416-2.273 1.15-3.003a1.502 1.502 0 1 1 2.12 2.12Zm6.94-3.935c-.088.06-.177.118-.266.175l-2.35 1.521.548 1.783 1.949-1.2a.25.25 0 0 0 .119-.213ZM3.678 8.116 5.2 5.766c.058-.09.117-.178.176-.266H3.309a.25.25 0 0 0-.213.119l-1.2 1.95ZM12 5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> </template> <template id="shield-check-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield-check"> <path d="m8.533.133 5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667l5.25-1.68a1.748 1.748 0 0 1 1.066 0Zm-.61 1.429.001.001-5.25 1.68a.251.251 0 0 0-.174.237V7c0 1.36.275 2.666 1.057 3.859.784 1.194 2.121 2.342 4.366 3.298a.196.196 0 0 0 .154 0c2.245-.957 3.582-2.103 4.366-3.297C13.225 9.666 13.5 8.358 13.5 7V3.48a.25.25 0 0 0-.174-.238l-5.25-1.68a.25.25 0 0 0-.153 0ZM11.28 6.28l-3.5 3.5a.75.75 0 0 1-1.06 0l-1.5-1.5a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l.97.97 2.97-2.97a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </template> <template id="heart-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-heart"> <path d="m8 14.25.345.666a.75.75 0 0 1-.69 0l-.008-.004-.018-.01a7.152 7.152 0 0 1-.31-.17 22.055 22.055 0 0 1-3.434-2.414C2.045 10.731 0 8.35 0 5.5 0 2.836 2.086 1 4.25 1 5.797 1 7.153 1.802 8 3.02 8.847 1.802 10.203 1 11.75 1 13.914 1 16 2.836 16 5.5c0 2.85-2.045 5.231-3.885 6.818a22.066 22.066 0 0 1-3.744 2.584l-.018.01-.006.003h-.002ZM4.25 2.5c-1.336 0-2.75 1.164-2.75 3 0 2.15 1.58 4.144 3.365 5.682A20.58 20.58 0 0 0 8 13.393a20.58 20.58 0 0 0 3.135-2.211C12.92 9.644 14.5 7.65 14.5 5.5c0-1.836-1.414-3-2.75-3-1.373 0-2.609.986-3.029 2.456a.749.749 0 0 1-1.442 0C6.859 3.486 5.623 2.5 4.25 2.5Z"></path> </svg> </template> <template id="server-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-server"> <path d="M1.75 1h12.5c.966 0 1.75.784 1.75 1.75v4c0 .372-.116.717-.314 1 .198.283.314.628.314 1v4a1.75 1.75 0 0 1-1.75 1.75H1.75A1.75 1.75 0 0 1 0 12.75v-4c0-.358.109-.707.314-1a1.739 1.739 0 0 1-.314-1v-4C0 1.784.784 1 1.75 1ZM1.5 2.75v4c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25Zm.25 5.75a.25.25 0 0 0-.25.25v4c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-4a.25.25 0 0 0-.25-.25ZM7 4.75A.75.75 0 0 1 7.75 4h4.5a.75.75 0 0 1 0 1.5h-4.5A.75.75 0 0 1 7 4.75ZM7.75 10h4.5a.75.75 0 0 1 0 1.5h-4.5a.75.75 0 0 1 0-1.5ZM3 4.75A.75.75 0 0 1 3.75 4h.5a.75.75 0 0 1 0 1.5h-.5A.75.75 0 0 1 3 4.75ZM3.75 10h.5a.75.75 0 0 1 0 1.5h-.5a.75.75 0 0 1 0-1.5Z"></path> </svg> </template> <template id="globe-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-globe"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM5.78 8.75a9.64 9.64 0 0 0 1.363 4.177c.255.426.542.832.857 1.215.245-.296.551-.705.857-1.215A9.64 9.64 0 0 0 10.22 8.75Zm4.44-1.5a9.64 9.64 0 0 0-1.363-4.177c-.307-.51-.612-.919-.857-1.215a9.927 9.927 0 0 0-.857 1.215A9.64 9.64 0 0 0 5.78 7.25Zm-5.944 1.5H1.543a6.507 6.507 0 0 0 4.666 5.5c-.123-.181-.24-.365-.352-.552-.715-1.192-1.437-2.874-1.581-4.948Zm-2.733-1.5h2.733c.144-2.074.866-3.756 1.58-4.948.12-.197.237-.381.353-.552a6.507 6.507 0 0 0-4.666 5.5Zm10.181 1.5c-.144 2.074-.866 3.756-1.58 4.948-.12.197-.237.381-.353.552a6.507 6.507 0 0 0 4.666-5.5Zm2.733-1.5a6.507 6.507 0 0 0-4.666-5.5c.123.181.24.365.353.552.714 1.192 1.436 2.874 1.58 4.948Z"></path> </svg> </template> <template id="issue-opened-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> </template> <template id="device-mobile-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-device-mobile"> <path d="M3.75 0h8.5C13.216 0 14 .784 14 1.75v12.5A1.75 1.75 0 0 1 12.25 16h-8.5A1.75 1.75 0 0 1 2 14.25V1.75C2 .784 2.784 0 3.75 0ZM3.5 1.75v12.5c0 .138.112.25.25.25h8.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25ZM8 13a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path> </svg> </template> <template id="package-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-package"> <path d="m8.878.392 5.25 3.045c.54.314.872.89.872 1.514v6.098a1.75 1.75 0 0 1-.872 1.514l-5.25 3.045a1.75 1.75 0 0 1-1.756 0l-5.25-3.045A1.75 1.75 0 0 1 1 11.049V4.951c0-.624.332-1.201.872-1.514L7.122.392a1.75 1.75 0 0 1 1.756 0ZM7.875 1.69l-4.63 2.685L8 7.133l4.755-2.758-4.63-2.685a.248.248 0 0 0-.25 0ZM2.5 5.677v5.372c0 .09.047.171.125.216l4.625 2.683V8.432Zm6.25 8.271 4.625-2.683a.25.25 0 0 0 .125-.216V5.677L8.75 8.432Z"></path> </svg> </template> <template id="credit-card-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-credit-card"> <path d="M10.75 9a.75.75 0 0 0 0 1.5h1.5a.75.75 0 0 0 0-1.5h-1.5Z"></path><path d="M0 3.75C0 2.784.784 2 1.75 2h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0 1 14.25 14H1.75A1.75 1.75 0 0 1 0 12.25ZM14.5 6.5h-13v5.75c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25Zm0-2.75a.25.25 0 0 0-.25-.25H1.75a.25.25 0 0 0-.25.25V5h13Z"></path> </svg> </template> <template id="play-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> </template> <template id="gift-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-gift"> <path d="M2 2.75A2.75 2.75 0 0 1 4.75 0c.983 0 1.873.42 2.57 1.232.268.318.497.668.68 1.042.183-.375.411-.725.68-1.044C9.376.42 10.266 0 11.25 0a2.75 2.75 0 0 1 2.45 4h.55c.966 0 1.75.784 1.75 1.75v2c0 .698-.409 1.301-1 1.582v4.918A1.75 1.75 0 0 1 13.25 16H2.75A1.75 1.75 0 0 1 1 14.25V9.332C.409 9.05 0 8.448 0 7.75v-2C0 4.784.784 4 1.75 4h.55c-.192-.375-.3-.8-.3-1.25ZM7.25 9.5H2.5v4.75c0 .138.112.25.25.25h4.5Zm1.5 0v5h4.5a.25.25 0 0 0 .25-.25V9.5Zm0-4V8h5.5a.25.25 0 0 0 .25-.25v-2a.25.25 0 0 0-.25-.25Zm-7 0a.25.25 0 0 0-.25.25v2c0 .138.112.25.25.25h5.5V5.5h-5.5Zm3-4a1.25 1.25 0 0 0 0 2.5h2.309c-.233-.818-.542-1.401-.878-1.793-.43-.502-.915-.707-1.431-.707ZM8.941 4h2.309a1.25 1.25 0 0 0 0-2.5c-.516 0-1 .205-1.43.707-.337.392-.646.975-.879 1.793Z"></path> </svg> </template> <template id="code-square-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code-square"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25V1.75a.25.25 0 0 0-.25-.25Zm7.47 3.97a.75.75 0 0 1 1.06 0l2 2a.75.75 0 0 1 0 1.06l-2 2a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L10.69 8 9.22 6.53a.75.75 0 0 1 0-1.06ZM6.78 6.53 5.31 8l1.47 1.47a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215l-2-2a.75.75 0 0 1 0-1.06l2-2a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </template> <template id="device-desktop-icon"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-device-desktop"> <path d="M14.25 1c.966 0 1.75.784 1.75 1.75v7.5A1.75 1.75 0 0 1 14.25 12h-3.727c.099 1.041.52 1.872 1.292 2.757A.752.752 0 0 1 11.25 16h-6.5a.75.75 0 0 1-.565-1.243c.772-.885 1.192-1.716 1.292-2.757H1.75A1.75 1.75 0 0 1 0 10.25v-7.5C0 1.784.784 1 1.75 1ZM1.75 2.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h12.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25ZM9.018 12H6.982a5.72 5.72 0 0 1-.765 2.5h3.566a5.72 5.72 0 0 1-.765-2.5Z"></path> </svg> </template> <div class="position-relative"> <ul role="listbox" class="ActionListWrap QueryBuilder-ListWrap" aria-label="Suggestions" data-action=" combobox-commit:query-builder#comboboxCommit mousedown:query-builder#resultsMousedown " data-target="query-builder.resultsList" data-persist-list=false id="query-builder-test-results" ></ul> </div> <div class="FormControl-inlineValidation" id="validation-e49bee3b-273b-45f0-a2b8-ec1a3c1520a1" hidden="hidden"> <span class="FormControl-inlineValidation--visual"> <svg aria-hidden="true" height="12" viewBox="0 0 12 12" version="1.1" width="12" data-view-component="true" class="octicon octicon-alert-fill"> <path d="M4.855.708c.5-.896 1.79-.896 2.29 0l4.675 8.351a1.312 1.312 0 0 1-1.146 1.954H1.33A1.313 1.313 0 0 1 .183 9.058ZM7 7V3H5v4Zm-1 3a1 1 0 1 0 0-2 1 1 0 0 0 0 2Z"></path> </svg> </span> <span></span> </div> </div> <div data-target="query-builder.screenReaderFeedback" aria-live="polite" aria-atomic="true" class="sr-only"></div> </query-builder></form> <div class="d-flex flex-row color-fg-muted px-3 text-small color-bg-default search-feedback-prompt"> <a target="_blank" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax" data-view-component="true" class="Link color-fg-accent text-normal ml-2">Search syntax tips</a> <div class="d-flex flex-1"></div> </div> </div> </div> </div> </modal-dialog></div> </div> <div data-action="click:qbsearch-input#retract" class="dark-backdrop position-fixed" hidden data-target="qbsearch-input.darkBackdrop"></div> <div class="color-fg-default"> <dialog-helper> <dialog data-target="qbsearch-input.feedbackDialog" data-action="close:qbsearch-input#handleDialogClose cancel:qbsearch-input#handleDialogClose" id="feedback-dialog" aria-modal="true" aria-labelledby="feedback-dialog-title" aria-describedby="feedback-dialog-description" data-view-component="true" class="Overlay Overlay-whenNarrow Overlay--size-medium Overlay--motion-scaleFade Overlay--disableScroll"> <div data-view-component="true" class="Overlay-header"> <div class="Overlay-headerContentWrap"> <div class="Overlay-titleWrap"> <h1 class="Overlay-title " id="feedback-dialog-title"> Provide feedback </h1> </div> <div class="Overlay-actionWrap"> <button data-close-dialog-id="feedback-dialog" aria-label="Close" type="button" data-view-component="true" class="close-button Overlay-closeButton"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg></button> </div> </div> </div> <scrollable-region data-labelled-by="feedback-dialog-title"> <div data-view-component="true" class="Overlay-body"> <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="code-search-feedback-form" data-turbo="false" action="/search/feedback" accept-charset="UTF-8" method="post"><input type="hidden" data-csrf="true" name="authenticity_token" value="6Pul1QlEARfotxvQ5IM7kU888CFmUKE4/gQL1JJ5CeDbQ7YJu27gO0KnsgYpobNBwNg7b+oPTDprR3GZffsXog==" /> <p>We read every piece of feedback, and take your input very seriously.</p> <textarea name="feedback" class="form-control width-full mb-2" style="height: 120px" id="feedback"></textarea> <input name="include_email" id="include_email" aria-label="Include my email address so I can be contacted" class="form-control mr-2" type="checkbox"> <label for="include_email" style="font-weight: normal">Include my email address so I can be contacted</label> </form></div> </scrollable-region> <div data-view-component="true" class="Overlay-footer Overlay-footer--alignEnd"> <button data-close-dialog-id="feedback-dialog" type="button" data-view-component="true" class="btn"> Cancel </button> <button form="code-search-feedback-form" data-action="click:qbsearch-input#submitFeedback" type="submit" data-view-component="true" class="btn-primary btn"> Submit feedback </button> </div> </dialog></dialog-helper> <custom-scopes data-target="qbsearch-input.customScopesManager"> <dialog-helper> <dialog data-target="custom-scopes.customScopesModalDialog" data-action="close:qbsearch-input#handleDialogClose cancel:qbsearch-input#handleDialogClose" id="custom-scopes-dialog" aria-modal="true" aria-labelledby="custom-scopes-dialog-title" aria-describedby="custom-scopes-dialog-description" data-view-component="true" class="Overlay Overlay-whenNarrow Overlay--size-medium Overlay--motion-scaleFade Overlay--disableScroll"> <div data-view-component="true" class="Overlay-header Overlay-header--divided"> <div class="Overlay-headerContentWrap"> <div class="Overlay-titleWrap"> <h1 class="Overlay-title " id="custom-scopes-dialog-title"> Saved searches </h1> <h2 id="custom-scopes-dialog-description" class="Overlay-description">Use saved searches to filter your results more quickly</h2> </div> <div class="Overlay-actionWrap"> <button data-close-dialog-id="custom-scopes-dialog" aria-label="Close" type="button" data-view-component="true" class="close-button Overlay-closeButton"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg></button> </div> </div> </div> <scrollable-region data-labelled-by="custom-scopes-dialog-title"> <div data-view-component="true" class="Overlay-body"> <div data-target="custom-scopes.customScopesModalDialogFlash"></div> <div hidden class="create-custom-scope-form" data-target="custom-scopes.createCustomScopeForm"> <!-- '"` --><!-- </textarea></xmp> --></option></form><form id="custom-scopes-dialog-form" data-turbo="false" action="/search/custom_scopes" accept-charset="UTF-8" method="post"><input type="hidden" data-csrf="true" name="authenticity_token" value="L60GvKxF7hOsYKEPIH6oBhjGA1kHT28w91mhLbaOVKFhNzvgAxMf6OXMyJmRfI2IusRyKQHSpfKSGsn767usGw==" /> <div data-target="custom-scopes.customScopesModalDialogFlash"></div> <input type="hidden" id="custom_scope_id" name="custom_scope_id" data-target="custom-scopes.customScopesIdField"> <div class="form-group"> <label for="custom_scope_name">Name</label> <auto-check src="/search/custom_scopes/check_name" required only-validate-on-blur="false"> <input type="text" name="custom_scope_name" id="custom_scope_name" data-target="custom-scopes.customScopesNameField" class="form-control" autocomplete="off" placeholder="github-ruby" required maxlength="50"> <input type="hidden" data-csrf="true" value="0HCxxZ+2YSZHFXdBJZhGu6GUv50Z98dpC2/B0bKqy+/0BTZ4zgWxhc47z0Jlvw6ZcCgR6M0Jjyh7Y3XlbM5G4w==" /> </auto-check> </div> <div class="form-group"> <label for="custom_scope_query">Query</label> <input type="text" name="custom_scope_query" id="custom_scope_query" data-target="custom-scopes.customScopesQueryField" class="form-control" autocomplete="off" placeholder="(repo:mona/a OR repo:mona/b) AND lang:python" required maxlength="500"> </div> <p class="text-small color-fg-muted"> To see all available qualifiers, see our <a class="Link--inTextBlock" href="https://docs.github.com/search-github/github-code-search/understanding-github-code-search-syntax">documentation</a>. </p> </form> </div> <div data-target="custom-scopes.manageCustomScopesForm"> <div data-target="custom-scopes.list"></div> </div> </div> </scrollable-region> <div data-view-component="true" class="Overlay-footer Overlay-footer--alignEnd Overlay-footer--divided"> <button data-action="click:custom-scopes#customScopesCancel" type="button" data-view-component="true" class="btn"> Cancel </button> <button form="custom-scopes-dialog-form" data-action="click:custom-scopes#customScopesSubmit" data-target="custom-scopes.customScopesSubmitButton" type="submit" data-view-component="true" class="btn-primary btn"> Create saved search </button> </div> </dialog></dialog-helper> </custom-scopes> </div> </qbsearch-input> <div class="position-relative HeaderMenu-link-wrap d-lg-inline-block"> <a href="/login?return_to=https%3A%2F%2Fgithub.com%2FJasonKessler%2Fscattertext" class="HeaderMenu-link HeaderMenu-link--sign-in HeaderMenu-button flex-shrink-0 no-underline d-none d-lg-inline-flex border border-lg-0 rounded rounded-lg-0 px-2 py-1" style="margin-left: 12px;" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="74ad727fdaab4f5d669815d6a8739fbf095b6abcba8c6cb0ca4d41a07c410f4c" data-analytics-event="{&quot;category&quot;:&quot;Marketing nav&quot;,&quot;action&quot;:&quot;click to go to homepage&quot;,&quot;label&quot;:&quot;ref_page:Marketing;ref_cta:Sign in;ref_loc:Header&quot;}" > Sign in </a> </div> <a href="/signup?ref_cta=Sign+up&amp;ref_loc=header+logged+out&amp;ref_page=%2F%3Cuser-name%3E%2F%3Crepo-name%3E&amp;source=header-repo&amp;source_repo=JasonKessler%2Fscattertext" class="HeaderMenu-link HeaderMenu-link--sign-up HeaderMenu-button flex-shrink-0 d-flex d-lg-inline-flex no-underline border color-border-default rounded px-2 py-1" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;site header menu&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;SIGN_UP&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="74ad727fdaab4f5d669815d6a8739fbf095b6abcba8c6cb0ca4d41a07c410f4c" data-analytics-event="{&quot;category&quot;:&quot;Sign up&quot;,&quot;action&quot;:&quot;click to sign up for account&quot;,&quot;label&quot;:&quot;ref_page:/&lt;user-name&gt;/&lt;repo-name&gt;;ref_cta:Sign up;ref_loc:header logged out&quot;}" > Sign up </a> <button type="button" class="sr-only js-header-menu-focus-trap d-block d-lg-none">Reseting focus</button> </div> </div> </div> </div> </header> <div hidden="hidden" data-view-component="true" class="js-stale-session-flash stale-session-flash flash flash-warn flash-full"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-alert"> <path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <span class="js-stale-session-flash-signed-in" hidden>You signed in with another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <span class="js-stale-session-flash-signed-out" hidden>You signed out in another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <span class="js-stale-session-flash-switched" hidden>You switched accounts on another tab or window. <a class="Link--inTextBlock" href="">Reload</a> to refresh your session.</span> <button id="icon-button-fee7f5b6-eec6-4079-a302-e31c1b4edf2c" aria-labelledby="tooltip-171404a3-7601-4c0a-a9f6-7fdac425dea3" type="button" data-view-component="true" class="Button Button--iconOnly Button--invisible Button--medium flash-close js-flash-close"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x Button-visual"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button><tool-tip id="tooltip-171404a3-7601-4c0a-a9f6-7fdac425dea3" for="icon-button-fee7f5b6-eec6-4079-a302-e31c1b4edf2c" popover="manual" data-direction="s" data-type="label" data-view-component="true" class="sr-only position-absolute">Dismiss alert</tool-tip> </div> </div> <div id="start-of-content" class="show-on-focus"></div> <div id="js-flash-container" class="flash-container" data-turbo-replace> <template class="js-flash-template"> <div class="flash flash-full {{ className }}"> <div > <button autofocus class="flash-close js-flash-close" type="button" aria-label="Dismiss this message"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> <div aria-atomic="true" role="alert" class="js-flash-alert"> <div>{{ message }}</div> </div> </div> </div> </template> </div> <div class="application-main " data-commit-hovercards-enabled data-discussion-hovercards-enabled data-issue-and-pr-hovercards-enabled data-project-hovercards-enabled > <div itemscope itemtype="http://schema.org/SoftwareSourceCode" class=""> <main id="js-repo-pjax-container" > <div id="repository-container-header" class="pt-3 hide-full-screen" style="background-color: var(--page-header-bgColor, var(--color-page-header-bg));" data-turbo-replace> <div class="d-flex flex-nowrap flex-justify-end mb-3 px-3 px-lg-5" style="gap: 1rem;"> <div class="flex-auto min-width-0 width-fit"> <div class=" d-flex flex-wrap flex-items-center wb-break-word f3 text-normal"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo color-fg-muted mr-2"> <path d="M2 2.5A2.5 2.5 0 0 1 4.5 0h8.75a.75.75 0 0 1 .75.75v12.5a.75.75 0 0 1-.75.75h-2.5a.75.75 0 0 1 0-1.5h1.75v-2h-8a1 1 0 0 0-.714 1.7.75.75 0 1 1-1.072 1.05A2.495 2.495 0 0 1 2 11.5Zm10.5-1h-8a1 1 0 0 0-1 1v6.708A2.486 2.486 0 0 1 4.5 9h8ZM5 12.25a.25.25 0 0 1 .25-.25h3.5a.25.25 0 0 1 .25.25v3.25a.25.25 0 0 1-.4.2l-1.45-1.087a.249.249 0 0 0-.3 0L5.4 15.7a.25.25 0 0 1-.4-.2Z"></path> </svg> <span class="author flex-self-stretch" itemprop="author"> <a class="url fn" rel="author" data-hovercard-type="user" data-hovercard-url="/users/JasonKessler/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" href="/JasonKessler"> JasonKessler </a> </span> <span class="mx-1 flex-self-stretch color-fg-muted">/</span> <strong itemprop="name" class="mr-2 flex-self-stretch"> <a data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/JasonKessler/scattertext">scattertext</a> </strong> <span></span><span class="Label Label--secondary v-align-middle mr-1">Public</span> </div> </div> <div id="repository-details-container" class="flex-shrink-0" data-turbo-replace style="max-width: 70%;"> <ul class="pagehead-actions flex-shrink-0 d-none d-md-inline" style="padding: 2px 0;"> <li> <a href="/login?return_to=%2FJasonKessler%2Fscattertext" rel="nofollow" id="repository-details-watch-button" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;notification subscription menu watch&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="dda0750954d6cb309a5f6a85c4054779f767a0fecee74d1a0cd0a85491b2397a" aria-label="You must be signed in to change notification settings" data-view-component="true" class="btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bell mr-2"> <path d="M8 16a2 2 0 0 0 1.985-1.75c.017-.137-.097-.25-.235-.25h-3.5c-.138 0-.252.113-.235.25A2 2 0 0 0 8 16ZM3 5a5 5 0 0 1 10 0v2.947c0 .05.015.098.042.139l1.703 2.555A1.519 1.519 0 0 1 13.482 13H2.518a1.516 1.516 0 0 1-1.263-2.36l1.703-2.554A.255.255 0 0 0 3 7.947Zm5-3.5A3.5 3.5 0 0 0 4.5 5v2.947c0 .346-.102.683-.294.97l-1.703 2.556a.017.017 0 0 0-.003.01l.001.006c0 .002.002.004.004.006l.006.004.007.001h10.964l.007-.001.006-.004.004-.006.001-.007a.017.017 0 0 0-.003-.01l-1.703-2.554a1.745 1.745 0 0 1-.294-.97V5A3.5 3.5 0 0 0 8 1.5Z"></path> </svg>Notifications </a> <tool-tip id="tooltip-940f68ce-5634-4318-adc6-39d52c69d942" for="repository-details-watch-button" popover="manual" data-direction="s" data-type="description" data-view-component="true" class="sr-only position-absolute">You must be signed in to change notification settings</tool-tip> </li> <li> <a icon="repo-forked" id="fork-button" href="/login?return_to=%2FJasonKessler%2Fscattertext" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;repo details fork button&quot;,&quot;repository_id&quot;:63827736,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="53cfbd25239d458e1a4689d18c7e252161515f7605cbbce17339613a7b2e1956" data-view-component="true" class="btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-2"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg>Fork <span id="repo-network-counter" data-pjax-replace="true" data-turbo-replace="true" title="292" data-view-component="true" class="Counter">292</span> </a> </li> <li> <div data-view-component="true" class="BtnGroup d-flex"> <a href="/login?return_to=%2FJasonKessler%2Fscattertext" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;star button&quot;,&quot;repository_id&quot;:63827736,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="c6dc45ef6d5762b1fb5c1048516867f94d725ec44b80a6f4c2c1b7d8d684a9e1" aria-label="You must be signed in to star a repository" data-view-component="true" class="tooltipped tooltipped-sw btn-sm btn"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star v-align-text-bottom d-inline-block mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg><span data-view-component="true" class="d-inline"> Star </span> <span id="repo-stars-counter-star" aria-label="2288 users starred this repository" data-singular-suffix="user starred this repository" data-plural-suffix="users starred this repository" data-turbo-replace="true" title="2,288" data-view-component="true" class="Counter js-social-count">2.3k</span> </a></div> </li> </ul> </div> </div> <div id="responsive-meta-container" data-turbo-replace> <div class="d-block d-md-none mb-2 px-3 px-md-4 px-lg-5"> <p class="f4 mb-3 "> Beautiful visualizations of how language differs among document types. </p> <h3 class="sr-only">License</h3> <div class="mb-2"> <a href="/JasonKessler/scattertext/blob/master/LICENSE" class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:license&quot;}" > <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-law mr-2"> <path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path> </svg> Apache-2.0 license </a> </div> <div class="mb-3"> <a class="Link--secondary no-underline mr-3" href="/JasonKessler/scattertext/stargazers"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star mr-1"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg> <span class="text-bold">2.3k</span> stars </a> <a class="Link--secondary no-underline mr-3" href="/JasonKessler/scattertext/forks"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-1"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg> <span class="text-bold">292</span> forks </a> <a class="Link--secondary no-underline mr-3 d-inline-block" href="/JasonKessler/scattertext/branches"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-branch mr-1"> <path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path> </svg> <span>Branches</span> </a> <a class="Link--secondary no-underline d-inline-block" href="/JasonKessler/scattertext/tags"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-tag mr-1"> <path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path> </svg> <span>Tags</span> </a> <a class="Link--secondary no-underline d-inline-block" href="/JasonKessler/scattertext/activity"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pulse mr-1"> <path d="M6 2c.306 0 .582.187.696.471L10 10.731l1.304-3.26A.751.751 0 0 1 12 7h3.25a.75.75 0 0 1 0 1.5h-2.742l-1.812 4.528a.751.751 0 0 1-1.392 0L6 4.77 4.696 8.03A.75.75 0 0 1 4 8.5H.75a.75.75 0 0 1 0-1.5h2.742l1.812-4.529A.751.751 0 0 1 6 2Z"></path> </svg> <span>Activity</span> </a> </div> <div class="d-flex flex-wrap gap-2"> <div class="flex-1"> <div data-view-component="true" class="BtnGroup d-flex"> <a href="/login?return_to=%2FJasonKessler%2Fscattertext" rel="nofollow" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;star button&quot;,&quot;repository_id&quot;:63827736,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="c6dc45ef6d5762b1fb5c1048516867f94d725ec44b80a6f4c2c1b7d8d684a9e1" aria-label="You must be signed in to star a repository" data-view-component="true" class="tooltipped tooltipped-sw btn-sm btn btn-block"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star v-align-text-bottom d-inline-block mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg><span data-view-component="true" class="d-inline"> Star </span> </a></div> </div> <div class="flex-1"> <a href="/login?return_to=%2FJasonKessler%2Fscattertext" rel="nofollow" id="files-overview-watch-button" data-hydro-click="{&quot;event_type&quot;:&quot;authentication.click&quot;,&quot;payload&quot;:{&quot;location_in_page&quot;:&quot;notification subscription menu watch&quot;,&quot;repository_id&quot;:null,&quot;auth_type&quot;:&quot;LOG_IN&quot;,&quot;originating_url&quot;:&quot;https://github.com/JasonKessler/scattertext&quot;,&quot;user_id&quot;:null}}" data-hydro-click-hmac="dda0750954d6cb309a5f6a85c4054779f767a0fecee74d1a0cd0a85491b2397a" aria-label="You must be signed in to change notification settings" data-view-component="true" class="btn-sm btn btn-block"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-bell mr-2"> <path d="M8 16a2 2 0 0 0 1.985-1.75c.017-.137-.097-.25-.235-.25h-3.5c-.138 0-.252.113-.235.25A2 2 0 0 0 8 16ZM3 5a5 5 0 0 1 10 0v2.947c0 .05.015.098.042.139l1.703 2.555A1.519 1.519 0 0 1 13.482 13H2.518a1.516 1.516 0 0 1-1.263-2.36l1.703-2.554A.255.255 0 0 0 3 7.947Zm5-3.5A3.5 3.5 0 0 0 4.5 5v2.947c0 .346-.102.683-.294.97l-1.703 2.556a.017.017 0 0 0-.003.01l.001.006c0 .002.002.004.004.006l.006.004.007.001h10.964l.007-.001.006-.004.004-.006.001-.007a.017.017 0 0 0-.003-.01l-1.703-2.554a1.745 1.745 0 0 1-.294-.97V5A3.5 3.5 0 0 0 8 1.5Z"></path> </svg>Notifications </a> <tool-tip id="tooltip-97cf2538-9ad1-434e-a8e6-352a9f33172f" for="files-overview-watch-button" popover="manual" data-direction="s" data-type="description" data-view-component="true" class="sr-only position-absolute">You must be signed in to change notification settings</tool-tip> </div> <span> </span> </div> </div> </div> <nav data-pjax="#js-repo-pjax-container" aria-label="Repository" data-view-component="true" class="js-repo-nav js-sidenav-container-pjax js-responsive-underlinenav overflow-hidden UnderlineNav px-3 px-md-4 px-lg-5"> <ul data-view-component="true" class="UnderlineNav-body list-style-none"> <li data-view-component="true" class="d-inline-flex"> <a id="code-tab" href="/JasonKessler/scattertext" data-tab-item="i0code-tab" data-selected-links="repo_source repo_downloads repo_commits repo_releases repo_tags repo_branches repo_packages repo_deployments repo_attestations /JasonKessler/scattertext" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g c" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Code&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" aria-current="page" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item selected"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code UnderlineNav-octicon d-none d-sm-inline"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> <span data-content="Code">Code</span> <span id="code-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="issues-tab" href="/JasonKessler/scattertext/issues" data-tab-item="i1issues-tab" data-selected-links="repo_issues repo_labels repo_milestones /JasonKessler/scattertext/issues" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g i" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Issues&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened UnderlineNav-octicon d-none d-sm-inline"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> <span data-content="Issues">Issues</span> <span id="issues-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="21" data-view-component="true" class="Counter">21</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="pull-requests-tab" href="/JasonKessler/scattertext/pulls" data-tab-item="i2pull-requests-tab" data-selected-links="repo_pulls checks /JasonKessler/scattertext/pulls" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g p" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Pull requests&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-pull-request UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"></path> </svg> <span data-content="Pull requests">Pull requests</span> <span id="pull-requests-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="1" data-view-component="true" class="Counter">1</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="discussions-tab" href="/JasonKessler/scattertext/discussions" data-tab-item="i3discussions-tab" data-selected-links="repo_discussions /JasonKessler/scattertext/discussions" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g g" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Discussions&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> <span data-content="Discussions">Discussions</span> <span id="discussions-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="actions-tab" href="/JasonKessler/scattertext/actions" data-tab-item="i4actions-tab" data-selected-links="repo_actions /JasonKessler/scattertext/actions" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g a" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Actions&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play UnderlineNav-octicon d-none d-sm-inline"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> <span data-content="Actions">Actions</span> <span id="actions-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="projects-tab" href="/JasonKessler/scattertext/projects" data-tab-item="i5projects-tab" data-selected-links="repo_projects new_repo_project repo_project /JasonKessler/scattertext/projects" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g b" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Projects&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-table UnderlineNav-octicon d-none d-sm-inline"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25ZM6.5 6.5v8h7.75a.25.25 0 0 0 .25-.25V6.5Zm8-1.5V1.75a.25.25 0 0 0-.25-.25H6.5V5Zm-13 1.5v7.75c0 .138.112.25.25.25H5v-8ZM5 5V1.5H1.75a.25.25 0 0 0-.25.25V5Z"></path> </svg> <span data-content="Projects">Projects</span> <span id="projects-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="0" hidden="hidden" data-view-component="true" class="Counter">0</span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="wiki-tab" href="/JasonKessler/scattertext/wiki" data-tab-item="i6wiki-tab" data-selected-links="repo_wiki /JasonKessler/scattertext/wiki" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g w" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Wiki&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book UnderlineNav-octicon d-none d-sm-inline"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> <span data-content="Wiki">Wiki</span> <span id="wiki-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="security-tab" href="/JasonKessler/scattertext/security" data-tab-item="i7security-tab" data-selected-links="security overview alerts policy token_scanning code_scanning /JasonKessler/scattertext/security" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-hotkey="g s" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Security&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield UnderlineNav-octicon d-none d-sm-inline"> <path d="M7.467.133a1.748 1.748 0 0 1 1.066 0l5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667Zm.61 1.429a.25.25 0 0 0-.153 0l-5.25 1.68a.25.25 0 0 0-.174.238V7c0 1.358.275 2.666 1.057 3.86.784 1.194 2.121 2.34 4.366 3.297a.196.196 0 0 0 .154 0c2.245-.956 3.582-2.104 4.366-3.298C13.225 9.666 13.5 8.36 13.5 7V3.48a.251.251 0 0 0-.174-.237l-5.25-1.68ZM8.75 4.75v3a.75.75 0 0 1-1.5 0v-3a.75.75 0 0 1 1.5 0ZM9 10.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <span data-content="Security">Security</span> <include-fragment src="/JasonKessler/scattertext/security/overall-count" accept="text/fragment+html"></include-fragment> </a></li> <li data-view-component="true" class="d-inline-flex"> <a id="insights-tab" href="/JasonKessler/scattertext/pulse" data-tab-item="i8insights-tab" data-selected-links="repo_graphs repo_contributors dependency_graph dependabot_updates pulse people community /JasonKessler/scattertext/pulse" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" data-analytics-event="{&quot;category&quot;:&quot;Underline navbar&quot;,&quot;action&quot;:&quot;Click tab&quot;,&quot;label&quot;:&quot;Insights&quot;,&quot;target&quot;:&quot;UNDERLINE_NAV.TAB&quot;}" data-view-component="true" class="UnderlineNav-item no-wrap js-responsive-underlinenav-item js-selected-navigation-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-graph UnderlineNav-octicon d-none d-sm-inline"> <path d="M1.5 1.75V13.5h13.75a.75.75 0 0 1 0 1.5H.75a.75.75 0 0 1-.75-.75V1.75a.75.75 0 0 1 1.5 0Zm14.28 2.53-5.25 5.25a.75.75 0 0 1-1.06 0L7 7.06 4.28 9.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.25-3.25a.75.75 0 0 1 1.06 0L10 7.94l4.72-4.72a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> <span data-content="Insights">Insights</span> <span id="insights-repo-tab-count" data-pjax-replace="" data-turbo-replace="" title="Not available" data-view-component="true" class="Counter"></span> </a></li> </ul> <div style="visibility:hidden;" data-view-component="true" class="UnderlineNav-actions js-responsive-underlinenav-overflow position-absolute pr-3 pr-md-4 pr-lg-5 right-0"> <action-menu data-select-variant="none" data-view-component="true"> <focus-group direction="vertical" mnemonics retain> <button id="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-button" popovertarget="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-overlay" aria-controls="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-list" aria-haspopup="true" aria-labelledby="tooltip-686e8374-3840-4124-9dd2-66738a92f2b2" type="button" data-view-component="true" class="Button Button--iconOnly Button--secondary Button--medium UnderlineNav-item"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-kebab-horizontal Button-visual"> <path d="M8 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3ZM1.5 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Zm13 0a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path> </svg> </button><tool-tip id="tooltip-686e8374-3840-4124-9dd2-66738a92f2b2" for="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-button" popover="manual" data-direction="s" data-type="label" data-view-component="true" class="sr-only position-absolute">Additional navigation options</tool-tip> <anchored-position data-target="action-menu.overlay" id="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-overlay" anchor="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-button" align="start" side="outside-bottom" anchor-offset="normal" popover="auto" data-view-component="true"> <div data-view-component="true" class="Overlay Overlay--size-auto"> <div data-view-component="true" class="Overlay-body Overlay-body--paddingNone"> <action-list> <div data-view-component="true"> <ul aria-labelledby="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-button" id="action-menu-d9b6b481-ac16-48ed-94c5-4b905ffdebb7-list" role="menu" data-view-component="true" class="ActionListWrap--inset ActionListWrap"> <li hidden="hidden" data-menu-item="i0code-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-9c80acce-0da2-43f4-ba9d-55b406da4a99" href="/JasonKessler/scattertext" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-code"> <path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Code </span> </a> </li> <li hidden="hidden" data-menu-item="i1issues-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-c3538a52-2734-4f15-8aea-b5b2f6df7b84" href="/JasonKessler/scattertext/issues" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-issue-opened"> <path d="M8 9.5a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path><path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Issues </span> </a> </li> <li hidden="hidden" data-menu-item="i2pull-requests-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-f2837bfb-a099-4f68-80e6-6d87a6f1a9b7" href="/JasonKessler/scattertext/pulls" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-git-pull-request"> <path d="M1.5 3.25a2.25 2.25 0 1 1 3 2.122v5.256a2.251 2.251 0 1 1-1.5 0V5.372A2.25 2.25 0 0 1 1.5 3.25Zm5.677-.177L9.573.677A.25.25 0 0 1 10 .854V2.5h1A2.5 2.5 0 0 1 13.5 5v5.628a2.251 2.251 0 1 1-1.5 0V5a1 1 0 0 0-1-1h-1v1.646a.25.25 0 0 1-.427.177L7.177 3.427a.25.25 0 0 1 0-.354ZM3.75 2.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm0 9.5a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Zm8.25.75a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Pull requests </span> </a> </li> <li hidden="hidden" data-menu-item="i3discussions-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-87373943-4588-4ca1-a9a0-7c5c89302ce6" href="/JasonKessler/scattertext/discussions" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-comment-discussion"> <path d="M1.75 1h8.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 10.25 10H7.061l-2.574 2.573A1.458 1.458 0 0 1 2 11.543V10h-.25A1.75 1.75 0 0 1 0 8.25v-5.5C0 1.784.784 1 1.75 1ZM1.5 2.75v5.5c0 .138.112.25.25.25h1a.75.75 0 0 1 .75.75v2.19l2.72-2.72a.749.749 0 0 1 .53-.22h3.5a.25.25 0 0 0 .25-.25v-5.5a.25.25 0 0 0-.25-.25h-8.5a.25.25 0 0 0-.25.25Zm13 2a.25.25 0 0 0-.25-.25h-.5a.75.75 0 0 1 0-1.5h.5c.966 0 1.75.784 1.75 1.75v5.5A1.75 1.75 0 0 1 14.25 12H14v1.543a1.458 1.458 0 0 1-2.487 1.03L9.22 12.28a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215l2.22 2.22v-2.19a.75.75 0 0 1 .75-.75h1a.25.25 0 0 0 .25-.25Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Discussions </span> </a> </li> <li hidden="hidden" data-menu-item="i4actions-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-a410a082-24ec-4a68-8cc1-9ab060051f6f" href="/JasonKessler/scattertext/actions" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-play"> <path d="M8 0a8 8 0 1 1 0 16A8 8 0 0 1 8 0ZM1.5 8a6.5 6.5 0 1 0 13 0 6.5 6.5 0 0 0-13 0Zm4.879-2.773 4.264 2.559a.25.25 0 0 1 0 .428l-4.264 2.559A.25.25 0 0 1 6 10.559V5.442a.25.25 0 0 1 .379-.215Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Actions </span> </a> </li> <li hidden="hidden" data-menu-item="i5projects-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-6a1cdc70-470a-450f-8059-e9850bd2fa9f" href="/JasonKessler/scattertext/projects" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-table"> <path d="M0 1.75C0 .784.784 0 1.75 0h12.5C15.216 0 16 .784 16 1.75v12.5A1.75 1.75 0 0 1 14.25 16H1.75A1.75 1.75 0 0 1 0 14.25ZM6.5 6.5v8h7.75a.25.25 0 0 0 .25-.25V6.5Zm8-1.5V1.75a.25.25 0 0 0-.25-.25H6.5V5Zm-13 1.5v7.75c0 .138.112.25.25.25H5v-8ZM5 5V1.5H1.75a.25.25 0 0 0-.25.25V5Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Projects </span> </a> </li> <li hidden="hidden" data-menu-item="i6wiki-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-9e69c385-d934-4e4a-9216-ebb92e8bbc36" href="/JasonKessler/scattertext/wiki" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Wiki </span> </a> </li> <li hidden="hidden" data-menu-item="i7security-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-897ebe03-b952-4dca-aad7-b26645298c49" href="/JasonKessler/scattertext/security" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-shield"> <path d="M7.467.133a1.748 1.748 0 0 1 1.066 0l5.25 1.68A1.75 1.75 0 0 1 15 3.48V7c0 1.566-.32 3.182-1.303 4.682-.983 1.498-2.585 2.813-5.032 3.855a1.697 1.697 0 0 1-1.33 0c-2.447-1.042-4.049-2.357-5.032-3.855C1.32 10.182 1 8.566 1 7V3.48a1.75 1.75 0 0 1 1.217-1.667Zm.61 1.429a.25.25 0 0 0-.153 0l-5.25 1.68a.25.25 0 0 0-.174.238V7c0 1.358.275 2.666 1.057 3.86.784 1.194 2.121 2.34 4.366 3.297a.196.196 0 0 0 .154 0c2.245-.956 3.582-2.104 4.366-3.298C13.225 9.666 13.5 8.36 13.5 7V3.48a.251.251 0 0 0-.174-.237l-5.25-1.68ZM8.75 4.75v3a.75.75 0 0 1-1.5 0v-3a.75.75 0 0 1 1.5 0ZM9 10.5a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Security </span> </a> </li> <li hidden="hidden" data-menu-item="i8insights-tab" data-targets="action-list.items" role="none" data-view-component="true" class="ActionListItem"> <a tabindex="-1" id="item-611f6e0a-758b-4ce6-a20d-2dc5cd95bbbd" href="/JasonKessler/scattertext/pulse" role="menuitem" data-view-component="true" class="ActionListContent ActionListContent--visual16"> <span class="ActionListItem-visual ActionListItem-visual--leading"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-graph"> <path d="M1.5 1.75V13.5h13.75a.75.75 0 0 1 0 1.5H.75a.75.75 0 0 1-.75-.75V1.75a.75.75 0 0 1 1.5 0Zm14.28 2.53-5.25 5.25a.75.75 0 0 1-1.06 0L7 7.06 4.28 9.78a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042l3.25-3.25a.75.75 0 0 1 1.06 0L10 7.94l4.72-4.72a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042Z"></path> </svg> </span> <span data-view-component="true" class="ActionListItem-label"> Insights </span> </a> </li> </ul> </div></action-list> </div> </div></anchored-position> </focus-group> </action-menu></div> </nav> </div> <turbo-frame id="repo-content-turbo-frame" target="_top" data-turbo-action="advance" class=""> <div id="repo-content-pjax-container" class="repository-content " > <h1 class='sr-only'>JasonKessler/scattertext</h1> <div class="clearfix container-xl px-md-4 px-lg-5 px-3"> <div> <div style="max-width: 100%" data-view-component="true" class="Layout Layout--flowRow-until-md react-repos-overview-margin Layout--sidebarPosition-end Layout--sidebarPosition-flowRow-end"> <div data-view-component="true" class="Layout-main"> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_dompurify_dist_purify_es_mjs-dd1d3ea6a436.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/vendors-node_modules_tanstack_query-core_build_modern_queryObserver_js-node_modules_tanstack_-defd52-843b41414e0e.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_aria-live_aria-live_ts-ui_packages_promise-with-resolvers-polyfill_promise-with-r-17c672-34345cb18aac.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_paths_index_ts-e019c54eb886.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_ref-selector_RefSelector_tsx-7496afc3784d.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_commit-attribution_index_ts-ui_packages_commit-checks-status_index_ts-ui_packages-7094d4-15017f02e61c.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_code-view-shared_hooks_shortcuts_ts-ui_packages_code-view-shared_utilities_styles-0dc246-f8753c5db08d.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/ui_packages_code-view-shared_hooks_use-canonical-object_ts-ui_packages_code-view-shared_hooks-a83ec0-5ee2b562b57f.js"></script> <script crossorigin="anonymous" defer="defer" type="application/javascript" src="https://github.githubassets.com/assets/repos-overview-b4f8b323dc3b.js"></script> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/primer-react.9df1783473f10f02fb62.module.css" /> <link crossorigin="anonymous" media="all" rel="stylesheet" href="https://github.githubassets.com/assets/repos-overview.0ee7cac3ab511a65d9f9.module.css" /> <react-partial partial-name="repos-overview" data-ssr="true" data-attempted-ssr="true" > <script type="application/json" data-target="react-partial.embeddedData">{"props":{"initialPayload":{"allShortcutsEnabled":false,"path":"/","repo":{"id":63827736,"defaultBranch":"master","name":"scattertext","ownerLogin":"JasonKessler","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2016-07-21T01:47:12.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/312924?v=4","public":true,"private":false,"isOrgOwned":false},"currentUser":null,"refInfo":{"name":"master","listCacheKey":"v0:1625637462.898193","canEdit":false,"refType":"branch","currentOid":"d142b8753117761ecbac28929ed9762934cb0a02"},"tree":{"items":[{"name":"scattertext","path":"scattertext","contentType":"directory"},{"name":".gitattributes","path":".gitattributes","contentType":"file"},{"name":".travis.yml","path":".travis.yml","contentType":"file"},{"name":"ISSUE_TEMPLATE","path":"ISSUE_TEMPLATE","contentType":"file"},{"name":"LICENSE","path":"LICENSE","contentType":"file"},{"name":"MANIFEST.in","path":"MANIFEST.in","contentType":"file"},{"name":"PhraseMachineLicense.txt","path":"PhraseMachineLicense.txt","contentType":"file"},{"name":"README.md","path":"README.md","contentType":"file"},{"name":"demo.py","path":"demo.py","contentType":"file"},{"name":"demo_alt_tokenization.py","path":"demo_alt_tokenization.py","contentType":"file"},{"name":"demo_axis_crossbars_and_labels.py","path":"demo_axis_crossbars_and_labels.py","contentType":"file"},{"name":"demo_beta_posterior.py","path":"demo_beta_posterior.py","contentType":"file"},{"name":"demo_bi_normal_separation.py","path":"demo_bi_normal_separation.py","contentType":"file"},{"name":"demo_bm25.py","path":"demo_bm25.py","contentType":"file"},{"name":"demo_bow_pca.py","path":"demo_bow_pca.py","contentType":"file"},{"name":"demo_category_frequencies.py","path":"demo_category_frequencies.py","contentType":"file"},{"name":"demo_characteristic_chart.py","path":"demo_characteristic_chart.py","contentType":"file"},{"name":"demo_chinese.py","path":"demo_chinese.py","contentType":"file"},{"name":"demo_cliffs_delta.py","path":"demo_cliffs_delta.py","contentType":"file"},{"name":"demo_cognitive_distortions.py","path":"demo_cognitive_distortions.py","contentType":"file"},{"name":"demo_cohens_d.py","path":"demo_cohens_d.py","contentType":"file"},{"name":"demo_compact.py","path":"demo_compact.py","contentType":"file"},{"name":"demo_compact_suppress_documents.py","path":"demo_compact_suppress_documents.py","contentType":"file"},{"name":"demo_correlation_pearsons.py","path":"demo_correlation_pearsons.py","contentType":"file"},{"name":"demo_craigs_zeta.py","path":"demo_craigs_zeta.py","contentType":"file"},{"name":"demo_cred_tfidf.py","path":"demo_cred_tfidf.py","contentType":"file"},{"name":"demo_custom_coordinates.py","path":"demo_custom_coordinates.py","contentType":"file"},{"name":"demo_custom_topic_model.py","path":"demo_custom_topic_model.py","contentType":"file"},{"name":"demo_deltajsd.py","path":"demo_deltajsd.py","contentType":"file"},{"name":"demo_dense_rank.py","path":"demo_dense_rank.py","contentType":"file"},{"name":"demo_dense_rank_difference.py","path":"demo_dense_rank_difference.py","contentType":"file"},{"name":"demo_dispersion.py","path":"demo_dispersion.py","contentType":"file"},{"name":"demo_dispersion_basic.py","path":"demo_dispersion_basic.py","contentType":"file"},{"name":"demo_dissemination.py","path":"demo_dissemination.py","contentType":"file"},{"name":"demo_embeddings_pca.py","path":"demo_embeddings_pca.py","contentType":"file"},{"name":"demo_emoji.py","path":"demo_emoji.py","contentType":"file"},{"name":"demo_empath.py","path":"demo_empath.py","contentType":"file"},{"name":"demo_eta_da.py","path":"demo_eta_da.py","contentType":"file"},{"name":"demo_expected_vs_actual.py","path":"demo_expected_vs_actual.py","contentType":"file"},{"name":"demo_feature_importance.py","path":"demo_feature_importance.py","contentType":"file"},{"name":"demo_flashtext.py","path":"demo_flashtext.py","contentType":"file"},{"name":"demo_focused_pair_plot_movies.py","path":"demo_focused_pair_plot_movies.py","contentType":"file"},{"name":"demo_foreign_characteristic_frequencies.py","path":"demo_foreign_characteristic_frequencies.py","contentType":"file"},{"name":"demo_four_square.py","path":"demo_four_square.py","contentType":"file"},{"name":"demo_g2.py","path":"demo_g2.py","contentType":"file"},{"name":"demo_general_inquirer.py","path":"demo_general_inquirer.py","contentType":"file"},{"name":"demo_general_inquirer_frequency_plot.py","path":"demo_general_inquirer_frequency_plot.py","contentType":"file"},{"name":"demo_gensim_similarity.py","path":"demo_gensim_similarity.py","contentType":"file"},{"name":"demo_global_scale_log.py","path":"demo_global_scale_log.py","contentType":"file"},{"name":"demo_gradient.py","path":"demo_gradient.py","contentType":"file"},{"name":"demo_hedges_g.py","path":"demo_hedges_g.py","contentType":"file"},{"name":"demo_ignore_categories.py","path":"demo_ignore_categories.py","contentType":"file"},{"name":"demo_include_all_contexts.py","path":"demo_include_all_contexts.py","contentType":"file"},{"name":"demo_insignificant_greyed_out.py","path":"demo_insignificant_greyed_out.py","contentType":"file"},{"name":"demo_japanese.py","path":"demo_japanese.py","contentType":"file"},{"name":"demo_label_coloring.py","path":"demo_label_coloring.py","contentType":"file"},{"name":"demo_lemmas.py","path":"demo_lemmas.py","contentType":"file"},{"name":"demo_log_odds_ratio_prior.py","path":"demo_log_odds_ratio_prior.py","contentType":"file"},{"name":"demo_log_relative_risk.py","path":"demo_log_relative_risk.py","contentType":"file"},{"name":"demo_log_scale.py","path":"demo_log_scale.py","contentType":"file"},{"name":"demo_lrc.py","path":"demo_lrc.py","contentType":"file"},{"name":"demo_lrc_movies.py","path":"demo_lrc_movies.py","contentType":"file"},{"name":"demo_mann_whitney.py","path":"demo_mann_whitney.py","contentType":"file"},{"name":"demo_matplotlib_export.py","path":"demo_matplotlib_export.py","contentType":"file"},{"name":"demo_moral_foundations.py","path":"demo_moral_foundations.py","contentType":"file"},{"name":"demo_multi_category_pca.py","path":"demo_multi_category_pca.py","contentType":"file"},{"name":"demo_names.py","path":"demo_names.py","contentType":"file"},{"name":"demo_nmf_topic_model.py","path":"demo_nmf_topic_model.py","contentType":"file"},{"name":"demo_obama.py","path":"demo_obama.py","contentType":"file"},{"name":"demo_output_data.py","path":"demo_output_data.py","contentType":"file"},{"name":"demo_pair_plot_20_newsgroups.py","path":"demo_pair_plot_20_newsgroups.py","contentType":"file"},{"name":"demo_pair_plot_category_focused.py","path":"demo_pair_plot_category_focused.py","contentType":"file"},{"name":"demo_pair_plot_convention.py","path":"demo_pair_plot_convention.py","contentType":"file"},{"name":"demo_pair_plot_convention_empath.py","path":"demo_pair_plot_convention_empath.py","contentType":"file"},{"name":"demo_pair_plot_convention_geninq.py","path":"demo_pair_plot_convention_geninq.py","contentType":"file"},{"name":"demo_pair_plot_convention_pacmap.py","path":"demo_pair_plot_convention_pacmap.py","contentType":"file"},{"name":"demo_pair_plot_movies.py","path":"demo_pair_plot_movies.py","contentType":"file"},{"name":"demo_pair_plot_movies_doc2vec.py","path":"demo_pair_plot_movies_doc2vec.py","contentType":"file"},{"name":"demo_pair_plot_movies_empath.py","path":"demo_pair_plot_movies_empath.py","contentType":"file"},{"name":"demo_pair_plot_movies_mirror.py","path":"demo_pair_plot_movies_mirror.py","contentType":"file"},{"name":"demo_pair_plot_movies_mirror_simple.py","path":"demo_pair_plot_movies_mirror_simple.py","contentType":"file"},{"name":"demo_pair_plot_movies_pacmap.py","path":"demo_pair_plot_movies_pacmap.py","contentType":"file"},{"name":"demo_pair_plot_movies_pca.py","path":"demo_pair_plot_movies_pca.py","contentType":"file"},{"name":"demo_pair_plot_movies_phate.py","path":"demo_pair_plot_movies_phate.py","contentType":"file"},{"name":"demo_pair_plot_movies_umap.py","path":"demo_pair_plot_movies_umap.py","contentType":"file"},{"name":"demo_pca_documents.py","path":"demo_pca_documents.py","contentType":"file"},{"name":"demo_phate_documents.py","path":"demo_phate_documents.py","contentType":"file"},{"name":"demo_phrase_machine.py","path":"demo_phrase_machine.py","contentType":"file"},{"name":"demo_productivity.py","path":"demo_productivity.py","contentType":"file"},{"name":"demo_pytextrank.py","path":"demo_pytextrank.py","contentType":"file"},{"name":"demo_relative_entropy.py","path":"demo_relative_entropy.py","contentType":"file"},{"name":"demo_scaled_f_score.py","path":"demo_scaled_f_score.py","contentType":"file"},{"name":"demo_semiotic.py","path":"demo_semiotic.py","contentType":"file"},{"name":"demo_sentence_piece.py","path":"demo_sentence_piece.py","contentType":"file"},{"name":"demo_similarity.py","path":"demo_similarity.py","contentType":"file"},{"name":"demo_simple_maths.py","path":"demo_simple_maths.py","contentType":"file"},{"name":"demo_sklearn.py","path":"demo_sklearn.py","contentType":"file"},{"name":"demo_sparse.py","path":"demo_sparse.py","contentType":"file"},{"name":"demo_stylistic_features.py","path":"demo_stylistic_features.py","contentType":"file"},{"name":"demo_table.py","path":"demo_table.py","contentType":"file"},{"name":"demo_table_group.py","path":"demo_table_group.py","contentType":"file"},{"name":"demo_tdm_without_categories.py","path":"demo_tdm_without_categories.py","contentType":"file"},{"name":"demo_tfidf.py","path":"demo_tfidf.py","contentType":"file"},{"name":"demo_time_plot.py","path":"demo_time_plot.py","contentType":"file"},{"name":"demo_tokenizer_roberta.py","path":"demo_tokenizer_roberta.py","contentType":"file"},{"name":"demo_trigram_pmi.py","path":"demo_trigram_pmi.py","contentType":"file"},{"name":"demo_tsne_style.py","path":"demo_tsne_style.py","contentType":"file"},{"name":"demo_tsne_style_for_publication.py","path":"demo_tsne_style_for_publication.py","contentType":"file"},{"name":"demo_two_axis.py","path":"demo_two_axis.py","contentType":"file"},{"name":"demo_umap_documents.py","path":"demo_umap_documents.py","contentType":"file"},{"name":"demo_umap_wordcloud.py","path":"demo_umap_wordcloud.py","contentType":"file"},{"name":"demo_unified_context.py","path":"demo_unified_context.py","contentType":"file"},{"name":"demo_vertical_lines.py","path":"demo_vertical_lines.py","contentType":"file"},{"name":"demo_with_apostrophes.py","path":"demo_with_apostrophes.py","contentType":"file"},{"name":"demo_without_spacy.py","path":"demo_without_spacy.py","contentType":"file"},{"name":"demo_word_list_topic_model.py","path":"demo_word_list_topic_model.py","contentType":"file"},{"name":"demo_z_scores.py","path":"demo_z_scores.py","contentType":"file"},{"name":"distribution.sh","path":"distribution.sh","contentType":"file"},{"name":"regendocs.sh","path":"regendocs.sh","contentType":"file"},{"name":"setup.py","path":"setup.py","contentType":"file"},{"name":"simple.py","path":"simple.py","contentType":"file"}],"templateDirectorySuggestionUrl":null,"readme":null,"totalCount":121,"showBranchInfobar":false},"fileTree":null,"fileTreeProcessingTime":null,"foldersToFetch":[],"treeExpanded":false,"symbolsExpanded":false,"isOverview":true,"overview":{"banners":{"shouldRecommendReadme":false,"isPersonalRepo":false,"showUseActionBanner":false,"actionSlug":null,"actionId":null,"showProtectBranchBanner":false,"publishBannersInfo":{"dismissActionNoticePath":"/settings/dismiss-notice/publish_action_from_repo","releasePath":"/JasonKessler/scattertext/releases/new?marketplace=true","showPublishActionBanner":false},"interactionLimitBanner":null,"showInvitationBanner":false,"inviterName":null,"actionsMigrationBannerInfo":{"releaseTags":[],"showImmutableActionsMigrationBanner":false,"initialMigrationStatus":null}},"codeButton":{"contactPath":"/contact","isEnterprise":false,"local":{"protocolInfo":{"httpAvailable":true,"sshAvailable":null,"httpUrl":"https://github.com/JasonKessler/scattertext.git","showCloneWarning":null,"sshUrl":null,"sshCertificatesRequired":null,"sshCertificatesAvailable":null,"ghCliUrl":"gh repo clone JasonKessler/scattertext","defaultProtocol":"http","newSshKeyUrl":"/settings/ssh/new","setProtocolPath":"/users/set_protocol"},"platformInfo":{"cloneUrl":"https://desktop.github.com","showVisualStudioCloneButton":false,"visualStudioCloneUrl":"https://windows.github.com","showXcodeCloneButton":false,"xcodeCloneUrl":"xcode://clone?repo=https%3A%2F%2Fgithub.com%2FJasonKessler%2Fscattertext","zipballUrl":"/JasonKessler/scattertext/archive/refs/heads/master.zip"}},"newCodespacePath":"/codespaces/new?hide_repo_select=true\u0026repo=63827736"},"popovers":{"rename":null,"renamedParentRepo":null},"commitCount":"389","overviewFiles":[{"displayName":"README.md","repoName":"scattertext","refName":"master","path":"README.md","preferredFileType":"readme","tabName":"README","richText":"\u003carticle class=\"markdown-body entry-content container-lg\" itemprop=\"text\"\u003e\u003cp dir=\"auto\"\u003e\u003ca href=\"https://travis-ci.org/JasonKessler/scattertext\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/43d5dfb9a18061b821518d69fc053716441e4db83c58cd0fdb484f70b9c3cd36/68747470733a2f2f7472617669732d63692e6f72672f4a61736f6e4b6573736c65722f73636174746572746578742e7376673f6272616e63683d6d6173746572\" alt=\"Build Status\" data-canonical-src=\"https://travis-ci.org/JasonKessler/scattertext.svg?branch=master\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\n\u003ca href=\"/JasonKessler/scattertext/blob/master\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/0ed60cb8431e3b88329d69dbcc8dbc8ca9d527539aa1e9b6521536e010a83411/68747470733a2f2f696d672e736869656c64732e696f2f707970692f762f73636174746572746578742e737667\" alt=\"PyPI\" data-canonical-src=\"https://img.shields.io/pypi/v/scattertext.svg\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\n\u003ca href=\"https://gitter.im/scattertext/Lobby\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/d49b4fed41cfa2b8dd68ac724db9b926dec8e7f130d6b9fde8fb069ce5abd344/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f4749545445522d6a6f696e253230636861742d677265656e2e737667\" alt=\"Gitter Chat\" data-canonical-src=\"https://img.shields.io/badge/GITTER-join%20chat-green.svg\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\n\u003ca href=\"https://twitter.com/jasonkessler\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/9e10b788a9284c3bc5aee5831d13ba840f400b5eae820b5854e62b615e304bd4/68747470733a2f2f696d672e736869656c64732e696f2f747769747465722f666f6c6c6f772f657370616472696e652e7376673f7374796c653d736f6369616c266c6162656c3d466f6c6c6f77\" alt=\"Twitter Follow\" data-canonical-src=\"https://img.shields.io/twitter/follow/espadrine.svg?style=social\u0026amp;label=Follow\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch1 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eScattertext 0.2.2\u003c/h1\u003e\u003ca id=\"user-content-scattertext-022\" class=\"anchor\" aria-label=\"Permalink: Scattertext 0.2.2\" href=\"#scattertext-022\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eA tool for finding distinguishing terms in corpora and displaying them in an\ninteractive HTML scatter plot. Points corresponding to terms are selectively labeled\nso that they don't overlap with other labels or points.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCite as: Jason S. Kessler. Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ. ACL System\nDemonstrations. 2017.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eBelow is an example of using Scattertext to create visualize terms used in 2012 American\npolitical conventions. The 2,000 most party-associated uni grams are displayed as\npoints in the scatter plot. Their x- and y- axes are the dense ranks of their usage by\nRepublican and Democratic speakers respectively.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\n\ndf = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n)\n\ncorpus = st.CorpusFromParsedDocuments(\n df, category_col='party', parsed_col='parse'\n).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))\n\nhtml = st.produce_scattertext_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0, \n pmi_threshold_coefficient=0,\n width_in_pixels=1000, \n metadata=corpus.get_df()['speaker'],\n transform=st.Scalers.dense_rank,\n include_gradient=True,\n left_gradient_term='More Republican',\n middle_gradient_term='Metric: Dense Rank Difference',\n right_gradient_term='More Democratic',\n)\nopen('./demo_compact.html', 'w').write(html)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003eimport scattertext as st\n\ndf = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n)\n\ncorpus = st.CorpusFromParsedDocuments(\n df, category_col='party', parsed_col='parse'\n).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))\n\nhtml = st.produce_scattertext_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0, \n pmi_threshold_coefficient=0,\n width_in_pixels=1000, \n metadata=corpus.get_df()['speaker'],\n transform=st.Scalers.dense_rank,\n include_gradient=True,\n left_gradient_term='More Republican',\n middle_gradient_term='Metric: Dense Rank Difference',\n right_gradient_term='More Democratic',\n)\nopen('./demo_compact.html', 'w').write(html)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe HTML file written would look like the image below. Click on it for the actual interactive visualization.\n\u003ca href=\"https://jasonkessler.github.io/demo_compact.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_compact.png\" alt=\"demo_compact.html\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCitation\u003c/h2\u003e\u003ca id=\"user-content-citation\" class=\"anchor\" aria-label=\"Permalink: Citation\" href=\"#citation\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eJason S. Kessler. Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ. ACL System Demonstrations. 2017.\nLink to paper: \u003ca href=\"https://arxiv.org/abs/1703.00565\" rel=\"nofollow\"\u003earxiv.org/abs/1703.00565\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"@article{kessler2017scattertext,\n author = {Kessler, Jason S.},\n title = {Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ},\n booktitle = {Proceedings of ACL-2017 System Demonstrations},\n year = {2017},\n address = {Vancouver, Canada},\n publisher = {Association for Computational Linguistics},\n}\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e@article{kessler2017scattertext,\n author = {Kessler, Jason S.},\n title = {Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ},\n booktitle = {Proceedings of ACL-2017 System Demonstrations},\n year = {2017},\n address = {Vancouver, Canada},\n publisher = {Association for Computational Linguistics},\n}\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003cstrong\u003eTable of Contents\u003c/strong\u003e\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#installation\"\u003eInstallation\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#overview\"\u003eOverview\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#customizing-the-visualization-and-plotting-dispersion\"\u003eCustomizing the Visualization and Plotting Dispersion\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#tutorial\"\u003eTutorial\u003c/a\u003e\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#help-i-dont-know-python-but-i-still-want-to-use-scattertext\"\u003eHelp! I don't know Python but I still want to use Scattertext\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations\"\u003eUsing Scattertext as a text analysis library: finding characteristic terms and their associations\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-term-associations\"\u003eVisualizing term associations\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-phrase-associations\"\u003eVisualizing phrase associations\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#adding-color-gradients-to-explain-scores\"\u003eAdding color gradients to explain scores\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-empath-topics-and-categories\"\u003eVisualizing Empath topics and categories\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-the-moral-foundations-2.0-dictionary\"\u003eVisualizing the Moral Foundations 2.0 Dictionary\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#ordering-terms-by-corpus-characteristicness\"\u003eOrdering Terms by Corpus Characteristicness\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#document-based-scatterplots\"\u003eDocument-Based Scatterplots\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-cohens-d-or-hedges-g-to-visualize-effect-size\"\u003eUsing Cohen's d or Hedge's g to visualize effect size\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-cliffs-delta-to-visualize-effect-size\"\u003eUsing Cliff's Delta to visualize effect size\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-bi-normal-separation-bns-to-score-terms\"\u003eUsing Bi-Normal Separation (BNS) to score terms\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-correlations-to-explain-classifiers\"\u003eUsing correlations to explain classifiers\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-custom-background-word-frequencies\"\u003eUsing Custom Background Word Frequencies\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#plotting-word-productivity\"\u003ePlotting word productivity\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#understanding-scaled-f-score\"\u003eUnderstanding Scaled F-Score\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#alternative-term-scoring-methods\"\u003eAlternative term scoring methods\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#the-position-select-plot-process\"\u003eThe position-select-plot process\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#advanced-uses\"\u003eAdvanced Uses\u003c/a\u003e\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-differences-based-on-only-term-frequencies\"\u003eVisualizing differences based on only term frequencies\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-query-based-categorical-differences\"\u003eVisualizing query-based categorical differences\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-any-kind-of-term-score\"\u003eVisualizing any kind of term score\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#custom-term-positions\"\u003eCustom term positions\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#emoji-analysis\"\u003eEmoji analysis\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-sentencepiece-tokens\"\u003eVisualizing SentencePiece tokens\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-scikit-learn-text-classification-weights\"\u003eVisualizing scikit-learn text classification weights\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#creating-lexicalized-semiotic-squares\"\u003eCreating lexicalized semiotic squares\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#visualizing-topic-models\"\u003eVisualizing topic models\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#creating-T-SNE-style-word-embedding-projection-plots\"\u003eCreating T-SNE-style word embedding projection plots\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-svd-to-visualize-any-kind-of-word-embeddings\"\u003eUsing SVD to visualize any kind of word embeddings\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#exporting-plot-to-matplotlib\"\u003eExporting plot to matplotlib\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"#using-the-same-scale-for-both-axes\"\u003eUsing the same scale for both axes\u003c/a\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#examples\"\u003eExamples\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#a-note-on-chart-layout\"\u003eA note on chart layout\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#whats-new\"\u003eWhat's new\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003cli\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"#sources\"\u003eSources\u003c/a\u003e\u003c/p\u003e\n\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eInstallation\u003c/h2\u003e\u003ca id=\"user-content-installation\" class=\"anchor\" aria-label=\"Permalink: Installation\" href=\"#installation\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eInstall Python 3.11 or higher and run:\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ccode\u003e$ pip install scattertext\u003c/code\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIf you cannot (or don't want to) install spaCy, substitute \u003ccode\u003enlp = spacy.load('en')\u003c/code\u003e lines with\n\u003ccode\u003enlp = scattertext.WhitespaceNLP.whitespace_nlp\u003c/code\u003e. Note, this is not compatible\nwith \u003ccode\u003eword_similarity_explorer\u003c/code\u003e, and the tokenization and sentence boundary detection\ncapabilities will be low-performance regular expressions. See \u003ccode\u003edemo_without_spacy.py\u003c/code\u003e\nfor an example.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIt is recommended you install \u003ccode\u003ejieba\u003c/code\u003e, \u003ccode\u003espacy\u003c/code\u003e, \u003ccode\u003eempath\u003c/code\u003e, \u003ccode\u003eastropy\u003c/code\u003e, \u003ccode\u003eflashtext\u003c/code\u003e, \u003ccode\u003egensim\u003c/code\u003e and \u003ccode\u003eumap-learn\u003c/code\u003e in order to\ntake full advantage of Scattertext.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eScattertext should mostly work with Python 2.7, but it may not.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe HTML outputs look best in Chrome and Safari.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eStyle Guide\u003c/h2\u003e\u003ca id=\"user-content-style-guide\" class=\"anchor\" aria-label=\"Permalink: Style Guide\" href=\"#style-guide\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe name of this project is Scattertext. \"Scattertext\" is written as a single word\nand should be capitalized. When used in Python, the package \u003ccode\u003escattertext\u003c/code\u003e should be defined\nto the name \u003ccode\u003est\u003c/code\u003e, i.e., \u003ccode\u003eimport scattertext as st\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eOverview\u003c/h2\u003e\u003ca id=\"user-content-overview\" class=\"anchor\" aria-label=\"Permalink: Overview\" href=\"#overview\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThis is a tool that's intended for visualizing what words and phrases\nare more characteristic of a category than others.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eConsider the example at the top of the page.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eLooking at this seem overwhelming. In fact, it's a relatively simple visualization of word use\nduring the 2012 political convention. Each dot corresponds to a word or phrase mentioned by Republicans or Democrats\nduring their conventions. The closer a dot is to the top of the plot, the more frequently it was used by\nDemocrats. The further right a dot, the more that word or phrase was used by Republicans. Words frequently\nused by both parties, like \"of\" and \"the\" and even \"Mitt\" tend to occur in the upper-right-hand corner. Although very\nlow\nfrequency words have been hidden to preserve computing resources, a word that neither party used, like \"giraffe\"\nwould be in the bottom-left-hand corner.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe interesting things happen close to the upper-left and lower-right corners. In the upper-left corner,\nwords like \"auto\" (as in auto bailout) and \"millionaires\" are frequently used by Democrats but infrequently or never\nused\nby Republicans. Likewise, terms frequently used by Republicans and infrequently by Democrats occupy the\nbottom-right corner. These include \"big government\" and \"olympics\", referring to the Salt Lake City Olympics in which\nGov. Romney was involved.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTerms are colored by their association. Those that are more associated with Democrats are blue, and those\nmore associated with Republicans red.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTerms that are most characteristic of the both sets of documents are displayed\non the far-right of the visualization.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe inspiration for this visualization came from Dataclysm (Rudder, 2014).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eScattertext is designed to help you build these graphs and efficiently label points on them.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe documentation (including this readme) is a work in\nprogress. Please see the tutorial below as well as\nthe \u003ca href=\"https://github.com/JasonKessler/Scattertext-PyData\"\u003ePyData 2017 Tutorial\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003ePoking around the code and tests should give you a good idea of how things work.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe library covers some novel and effective term-importance formulas, including \u003cstrong\u003eScaled F-Score\u003c/strong\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCustomizing the Visualization and Plotting Dispersion\u003c/h2\u003e\u003ca id=\"user-content-customizing-the-visualization-and-plotting-dispersion\" class=\"anchor\" aria-label=\"Permalink: Customizing the Visualization and Plotting Dispersion\" href=\"#customizing-the-visualization-and-plotting-dispersion\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNew in Scattertext 0.1.0, one can use a dataframe for term/metadata positions and other term-specific data. We\ncan also use it to determine term-specific information which is shown after a term is clicked.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that it is possible to disable the use of document categories in Scattertext, as we shall see in this example.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThis example covers plotting term dispersion against word frequency and identifying the terms which are most and least\ndispersed given their frequencies. Using the Rosengren's S dispersion measure (Gries 2021), terms tend to increase in\ntheir\ndispersion scores as they get more frequent. We'll see how we can both plot this effect and factor out the effect\nof frequency.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThis, along with a number of other dispersion metrics presented in Gries (2021), are available and documented\nin the \u003ccode\u003eDispersion\u003c/code\u003e class, which we'll use later in the section.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eLet's start by creating a Convention corpus, but we'll use the \u003ccode\u003eCorpusWithoutCategoriesFromParsedDocuments\u003c/code\u003e factory\nto ensure that no categories are included in the corpus. If we try to find document categories, we'll see that\nall documents have the category '_'.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\n\ndf = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences))\ncorpus = st.CorpusWithoutCategoriesFromParsedDocuments(\n df, parsed_col='parse'\n).build().get_unigram_corpus().remove_infrequent_words(minimum_term_count=6)\n\ncorpus.get_categories()\n# Returns ['_']\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eparse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e))\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusWithoutCategoriesFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eremove_infrequent_words\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eminimum_term_count\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e6\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_categories\u003c/span\u003e()\n\u003cspan class=\"pl-c\"\u003e# Returns ['_']\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we'll create a dataframe for all terms we'll plot. We'll just start by creating a dataframe where we capture\nthe frequency of each term and various dispersion metrics. These will be shown after a term is activated in the plot.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"dispersion = st.Dispersion(corpus)\n\ndispersion_df = dispersion.get_df()\ndispersion_df.head(3)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003edispersion\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eDispersion\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003edispersion_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edispersion\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003edispersion_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ehead\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e3\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWhich returns\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\" Frequency Range SD VC Juilland's D Rosengren's S DP DP norm KL-divergence Dissemination\nthank 363 134 3.108113 1.618274 0.707416 0.694898 0.391548 0.391560 0.748808 0.972954\nyou 1630 177 12.383708 1.435902 0.888596 0.898805 0.233627 0.233635 0.263337 0.963905\nso 549 155 3.523380 1.212967 0.774299 0.822244 0.283151 0.283160 0.411750 0.986423```\n\nThese are discussed in detail in [Gries 2021](http://www.stgries.info/research/ToApp_STG_Dispersion_PHCL.pdf). \nDissementation is presented in Altmann et al. (2011).\n\nWe'll use Rosengren's S to find the dispersion of each term. It's which a metric designed for corpus parts\n(convention speeches in our case) of varying length. Where n is the number of documents in the corpus, s_i is the\npercentage of tokens in the corpus found in document i, v_i is term count in document i, and f is the total number\nof tokens in the corpus of type term type.\n\nRosengren's\nS: [![Rosengren's S](https://render.githubusercontent.com/render/math?math=\\frac{\\Sum_{i=1}^{n}\\sqrt{s_i%20\\cdot%20\\v_i})^2}{f})](https://render.githubusercontent.com/render/math?math=\\frac{\\Sum_{i=1}^{n}\\sqrt{s_i%20\\cdot%20\\v_i})\n^2}{f})\n\nIn order to start plotting, we'll need to add coordinates for each term to the data frame.\n\nTo use the `dataframe_scattertext` function, you need, at a minimum a dataframe with 'X' and 'Y' columns.\n\nThe `Xpos` and `Ypos` columns indicate the positions of the original `X` and `Y` values on the scatterplot, and\nneed to be between 0 and 1. Functions in `st.Scalers` perform this scaling. Absent `Xpos` or `Ypos`,\n`st.Scalers.scale` would be used.\n\nHere is a sample of values:\n\n* `st.Scalers.scale(vec)` Rescales the vector to where the minimum value is 0 and the maximum is 1.\n* `st.Scalers.log_scale(vec)` Rescales the lgo of the vector\n* `st.Scalers.dense_ranke(vec)` Rescales the dense rank of the vector\n* `st.Scalers.scale_center_zero_abs(vec)` Rescales a vector with both positive and negative values such that the 0 value\n in the original vector is plotted at 0.5, negative values are projected from [-argmax(abs(vec)), 0] to [0, 0.5] and\n positive values projected from [0, argmax(abs(vec))] to [0.5, 1].\n\n```python\ndispersion_df = dispersion_df.assign(\n X=lambda df: df.Frequency,\n Xpos=lambda df: st.Scalers.log_scale(df.X),\n Y=lambda df: df[\u0026quot;Rosengren's S\u0026quot;],\n Ypos=lambda df: st.Scalers.scale(df.Y),\n)\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e Frequency Range SD VC Juilland's D Rosengren's S DP DP norm KL-divergence Dissemination\nthank 363 134 3.108113 1.618274 0.707416 0.694898 0.391548 0.391560 0.748808 0.972954\nyou 1630 177 12.383708 1.435902 0.888596 0.898805 0.233627 0.233635 0.263337 0.963905\nso 549 155 3.523380 1.212967 0.774299 0.822244 0.283151 0.283160 0.411750 0.986423```\n\nThese are discussed in detail in [Gries 2021](http://www.stgries.info/research/ToApp_STG_Dispersion_PHCL.pdf). \nDissementation is presented in Altmann et al. (2011).\n\nWe'll use Rosengren's S to find the dispersion of each term. It's which a metric designed for corpus parts\n(convention speeches in our case) of varying length. Where n is the number of documents in the corpus, s_i is the\npercentage of tokens in the corpus found in document i, v_i is term count in document i, and f is the total number\nof tokens in the corpus of type term type.\n\nRosengren's\nS: [![Rosengren's S](https://render.githubusercontent.com/render/math?math=\\frac{\\Sum_{i=1}^{n}\\sqrt{s_i%20\\cdot%20\\v_i})^2}{f})](https://render.githubusercontent.com/render/math?math=\\frac{\\Sum_{i=1}^{n}\\sqrt{s_i%20\\cdot%20\\v_i})\n^2}{f})\n\nIn order to start plotting, we'll need to add coordinates for each term to the data frame.\n\nTo use the `dataframe_scattertext` function, you need, at a minimum a dataframe with 'X' and 'Y' columns.\n\nThe `Xpos` and `Ypos` columns indicate the positions of the original `X` and `Y` values on the scatterplot, and\nneed to be between 0 and 1. Functions in `st.Scalers` perform this scaling. Absent `Xpos` or `Ypos`,\n`st.Scalers.scale` would be used.\n\nHere is a sample of values:\n\n* `st.Scalers.scale(vec)` Rescales the vector to where the minimum value is 0 and the maximum is 1.\n* `st.Scalers.log_scale(vec)` Rescales the lgo of the vector\n* `st.Scalers.dense_ranke(vec)` Rescales the dense rank of the vector\n* `st.Scalers.scale_center_zero_abs(vec)` Rescales a vector with both positive and negative values such that the 0 value\n in the original vector is plotted at 0.5, negative values are projected from [-argmax(abs(vec)), 0] to [0, 0.5] and\n positive values projected from [0, argmax(abs(vec))] to [0.5, 1].\n\n```python\ndispersion_df = dispersion_df.assign(\n X=lambda df: df.Frequency,\n Xpos=lambda df: st.Scalers.log_scale(df.X),\n Y=lambda df: df[\"Rosengren's S\"],\n Ypos=lambda df: st.Scalers.scale(df.Y),\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNote that the \u003ccode\u003eYpos\u003c/code\u003e column here is not necessary since \u003ccode\u003eY\u003c/code\u003e would automatically be scaled.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFinally, since we are not distinguishing between categories, we can set \u003ccode\u003eignore_categories=True\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can now plot this graph using the \u003ccode\u003edataframe_scattertext\u003c/code\u003e function:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.dataframe_scattertext(\n corpus,\n plot_df=dispersion_df,\n metadata=corpus.get_df()['speaker'] + ' (' + corpus.get_df()['party'].str.upper() + ')',\n ignore_categories=True,\n x_label='Log Frequency',\n y_label=\u0026quot;Rosengren's S\u0026quot;,\n y_axis_labels=['Less Dispersion', 'Medium', 'More Dispersion'],\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edataframe_scattertext\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003edispersion_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e' ('\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003estr\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eupper\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e')'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eignore_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Log Frequency'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"Rosengren's S\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_axis_labels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Less Dispersion'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Medium'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'More Dispersion'\u003c/span\u003e],\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWhich yields (click for an interactive version):\n\u003ca href=\"https://jasonkessler.github.io/dispersion-basic.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/e2887ad2d16e5d3719ab4e6f01714fd7022005564847c9a929185b7549de7b79/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64697370657273696f6e2d62617369632e706e67\" alt=\"dispersion-basic.html\" data-canonical-src=\"https://jasonkessler.github.io/dispersion-basic.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that we can see various dispersion statistics under a term's name, in addition to the standard usage statistics. To\ncustomize the statistics which are displayed, set the \u003ccode\u003eterm_description_column=[...]\u003c/code\u003e parameter with a list of column\nnames to be displayed.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eOne issue in this dispersion chart, which tends to be common to dispersion metrics in general, is that dispersion\nand frequency tend to have a high correlation, but with a complex, non-linear curve. Depending on the metric,\nthis correlation curve could be power, linear, sigmoidal, or typically, something else.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn order to factor out this correlation, we can predict the dispersion from frequency using a non-parametric regressor,\nand see which terms have the highest and lowest residuals with respect to their expected dispersions based on their\nfrequencies.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn this case, we'll use a KNN regressor with 10 neighbors to predict Rosengren'S from term frequencies\n(\u003ccode\u003edispersion_df.X\u003c/code\u003e and \u003ccode\u003e.Y\u003c/code\u003e respectively), and compute the residual.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe'll the residual to color points, with a neutral color for residuals around 0 and other colors for positive and\nnegative values. We'll add a column in the data frame for point colors, and call it ColorScore. It is populated\nwith values between 0 and 1, with 0.5 as a netural color on the \u003ccode\u003ed3 interpolateWarm\u003c/code\u003e color scale. We use\n\u003ccode\u003est.Scalers.scale_center_zero_abs\u003c/code\u003e, discussed above, to make this transformation.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"from sklearn.neighbors import KNeighborsRegressor\n\ndispersion_df = dispersion_df.assign(\n Expected=lambda df: KNeighborsRegressor(n_neighbors=10).fit(\n df.X.values.reshape(-1, 1), df.Y\n ).predict(df.X.values.reshape(-1, 1)),\n Residual=lambda df: df.Y - df.Expected,\n ColorScore=lambda df: st.Scalers.scale_center_zero_abs(df.Residual)\n) \"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003eneighbors\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eKNeighborsRegressor\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003edispersion_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edispersion_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-v\"\u003eExpected\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-en\"\u003eKNeighborsRegressor\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003en_neighbors\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003efit\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ereshape\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e, \u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e), \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e\n ).\u003cspan class=\"pl-c1\"\u003epredict\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ereshape\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e, \u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e)),\n \u003cspan class=\"pl-v\"\u003eResidual\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eExpected\u003c/span\u003e,\n \u003cspan class=\"pl-v\"\u003eColorScore\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003escale_center_zero_abs\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eResidual\u003c/span\u003e)\n) \u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNow we are ready to plot our colored dispersion chart. We assign the ColorScore column name to the \u003ccode\u003ecolor_score_column\u003c/code\u003e\nparameter in \u003ccode\u003edataframe_scattertext\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdditionally, We'd like to populate the two term lists on the\nleft with terms that have high and low residual values, indicating terms which have the most dispersion relative to\ntheir frequency-expected level and the lowest. We can do this by the \u003ccode\u003eleft_list_column\u003c/code\u003e parameter. We can specify\nthe upper and lower term list names using the \u003ccode\u003eheader_names\u003c/code\u003e parameter. Finally, we can spiff-up the plot by\nadding an appealing background color.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.dataframe_scattertext(\n corpus,\n plot_df=dispersion_df,\n metadata=corpus.get_df()['speaker'] + ' (' + corpus.get_df()['party'].str.upper() + ')',\n ignore_categories=True,\n x_label='Log Frequency',\n y_label=\u0026quot;Rosengren's S\u0026quot;,\n y_axis_labels=['Less Dispersion', 'Medium', 'More Dispersion'],\n color_score_column='ColorScore',\n header_names={'upper': 'Lower than Expected', 'lower': 'More than Expected'},\n left_list_column='Residual',\n background_color='#e5e5e3'\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edataframe_scattertext\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003edispersion_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e' ('\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003estr\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eupper\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e')'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eignore_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Log Frequency'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"Rosengren's S\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_axis_labels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Less Dispersion'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Medium'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'More Dispersion'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003ecolor_score_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'ColorScore'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eheader_names\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e{\u003cspan class=\"pl-s\"\u003e'upper'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Lower than Expected'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'lower'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'More than Expected'\u003c/span\u003e},\n \u003cspan class=\"pl-s1\"\u003eleft_list_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Residual'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ebackground_color\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'#e5e5e3'\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWhich yields (click for an interactive version):\n\u003ca href=\"https://jasonkessler.github.io/dispersion-residual.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/1a4dacac9740785f890c344aa530d156789cf067a4f3c3d359458e42d03e2e1a/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64697370657273696f6e2d726573696475616c2e706e67\" alt=\"dispersion-residual.html\" data-canonical-src=\"https://jasonkessler.github.io/dispersion-residual.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eTutorial\u003c/h2\u003e\u003ca id=\"user-content-tutorial\" class=\"anchor\" aria-label=\"Permalink: Tutorial\" href=\"#tutorial\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eHelp! I don't know Python but I still want to use Scattertext.\u003c/h3\u003e\u003ca id=\"user-content-help-i-dont-know-python-but-i-still-want-to-use-scattertext\" class=\"anchor\" aria-label=\"Permalink: Help! I don't know Python but I still want to use Scattertext.\" href=\"#help-i-dont-know-python-but-i-still-want-to-use-scattertext\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWhile you should learn Python fully use Scattertext, I've put some of the basic\nfunctionality in a commandline tool. The tool is installed when you follow the procedure laid out\nabove.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eRun \u003ccode\u003e$ scattertext --help\u003c/code\u003e from the commandline to see the full usage information. Here's a quick example of\nhow to use vanilla Scattertext on a CSV file. The file needs to have at least two columns,\none containing the text to be analyzed, and another containing the category. In the example CSV below,\nthe columns are text and party, respectively.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe example below processes the CSV file, and the resulting HTML visualization into cli_demo.html.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote, the parameter \u003ccode\u003e--minimum_term_frequency=8\u003c/code\u003e omit terms that occur less than 8\ntimes, and \u003ccode\u003e--regex_parser\u003c/code\u003e indicates a simple regular expression parser should\nbe used in place of spaCy. The flag \u003ccode\u003e--one_use_per_doc\u003c/code\u003e indicates that term frequency\nshould be calculated by only counting no more than one occurrence of a term in a document.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIf you'd like to parse non-English text, you can use the \u003ccode\u003e--spacy_language_model\u003c/code\u003e argument to configure which\nspaCy language model the tool will use. The default is 'en' and you can see the others available at\n\u003ca href=\"https://spacy.io/docs/api/language-models\" rel=\"nofollow\"\u003ehttps://spacy.io/docs/api/language-models\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-shell notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"$ curl -s https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv | head -2\nparty,speaker,text\ndemocrat,BARACK OBAMA,\u0026quot;Thank you. Thank you. Thank you. Thank you so much.Thank you.Thank you so much. Thank you. Thank you very much, everybody. Thank you.\n$\n$ scattertext --datafile=https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv \\\n\u0026gt; --text_column=text --category_column=party --metadata_column=speaker --positive_category=democrat \\\n\u0026gt; --category_display_name=Democratic --not_category_display_name=Republican --minimum_term_frequency=8 \\\n\u0026gt; --one_use_per_doc --regex_parser --outputfile=cli_demo.html\"\u003e\u003cpre\u003e$ curl -s https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv \u003cspan class=\"pl-k\"\u003e|\u003c/span\u003e head -2\nparty,speaker,text\ndemocrat,BARACK OBAMA,\u003cspan class=\"pl-s\"\u003e\u003cspan class=\"pl-pds\"\u003e\"\u003c/span\u003eThank you. Thank you. Thank you. Thank you so much.Thank you.Thank you so much. Thank you. Thank you very much, everybody. Thank you.\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e$\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e$ scattertext --datafile=https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv \u003cspan class=\"pl-cce\"\u003e\\\u003c/span\u003e\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e\u0026gt; --text_column=text --category_column=party --metadata_column=speaker --positive_category=democrat \u003cspan class=\"pl-cce\"\u003e\\\u003c/span\u003e\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e\u0026gt; --category_display_name=Democratic --not_category_display_name=Republican --minimum_term_frequency=8 \u003cspan class=\"pl-cce\"\u003e\\\u003c/span\u003e\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e\u0026gt; --one_use_per_doc --regex_parser --outputfile=cli_demo.html\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Scattertext as a text analysis library: finding characteristic terms and their associations\u003c/h3\u003e\u003ca id=\"user-content-using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations\" class=\"anchor\" aria-label=\"Permalink: Using Scattertext as a text analysis library: finding characteristic terms and their associations\" href=\"#using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe following code creates a stand-alone HTML file that analyzes words\nused by Democrats and Republicans in the 2012 party conventions, and outputs some notable\nterm associations.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, import Scattertext and spaCy.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; import scattertext as st\n\u0026gt;\u0026gt;\u0026gt; import spacy\n\u0026gt;\u0026gt;\u0026gt; from pprint import pprint\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; import scattertext as st\n\u0026gt;\u0026gt;\u0026gt; import spacy\n\u0026gt;\u0026gt;\u0026gt; from pprint import pprint\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, assemble the data you want to analyze into a Pandas data frame. It should have\nat least two columns, the text you'd like to analyze, and the category you'd like to\nstudy. Here, the \u003ccode\u003etext\u003c/code\u003e column contains convention speeches while the \u003ccode\u003eparty\u003c/code\u003e column\ncontains the party of the speaker. We'll eventually use the \u003ccode\u003espeaker\u003c/code\u003e column\nto label snippets in the visualization.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() \n\u0026gt;\u0026gt;\u0026gt; convention_df.iloc[0]\nparty democrat\nspeaker BARACK OBAMA\ntext Thank you. Thank you. Thank you. Thank you so ...\nName: 0, dtype: object\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() \n\u0026gt;\u0026gt;\u0026gt; convention_df.iloc[0]\nparty democrat\nspeaker BARACK OBAMA\ntext Thank you. Thank you. Thank you. Thank you so ...\nName: 0, dtype: object\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTurn the data frame into a Scattertext Corpus to begin analyzing it. To look for differences\nin parties, set the \u003ccode\u003ecategory_col\u003c/code\u003e parameter to \u003ccode\u003e'party'\u003c/code\u003e, and use the speeches,\npresent in the \u003ccode\u003etext\u003c/code\u003e column, as the texts to analyze by setting the \u003ccode\u003etext\u003c/code\u003e col\nparameter. Finally, pass a spaCy model in to the \u003ccode\u003enlp\u003c/code\u003e argument and call \u003ccode\u003ebuild()\u003c/code\u003e to construct the corpus.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"# Turn it into a Scattertext Corpus \n\u0026gt;\u0026gt;\u0026gt; nlp = spacy.load('en')\n\u0026gt;\u0026gt;\u0026gt; corpus = st.CorpusFromPandas(convention_df, \n... category_col='party', \n... text_col='text',\n... nlp=nlp).build()\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e# Turn it into a Scattertext Corpus \n\u0026gt;\u0026gt;\u0026gt; nlp = spacy.load('en')\n\u0026gt;\u0026gt;\u0026gt; corpus = st.CorpusFromPandas(convention_df, \n... category_col='party', \n... text_col='text',\n... nlp=nlp).build()\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eLet's see characteristic terms in the corpus, and terms that are most associated Democrats and\nRepublicans. See slides\n\u003ca href=\"http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/52\" rel=\"nofollow\"\u003e52\u003c/a\u003e\nto \u003ca href=\"http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/59\" rel=\"nofollow\"\u003e59\u003c/a\u003e of\nthe \u003ca href=\"http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/\" rel=\"nofollow\"\u003eTurning Unstructured Content ot Kernels of Ideas\u003c/a\u003e\ntalk for more details on these approaches.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eHere are the terms that differentiate the corpus from a general English corpus.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; print(list(corpus.get_scaled_f_scores_vs_background().index[:10]))\n['obama',\n 'romney',\n 'barack',\n 'mitt',\n 'obamacare',\n 'biden',\n 'romneys',\n 'hardworking',\n 'bailouts',\n 'autoworkers']\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; print(list(corpus.get_scaled_f_scores_vs_background().index[:10]))\n['obama',\n 'romney',\n 'barack',\n 'mitt',\n 'obamacare',\n 'biden',\n 'romneys',\n 'hardworking',\n 'bailouts',\n 'autoworkers']\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eHere are the terms that are most associated with Democrats:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; term_freq_df = corpus.get_term_freq_df()\n\u0026gt;\u0026gt;\u0026gt; term_freq_df['Democratic Score'] = corpus.get_scaled_f_scores('democrat')\n\u0026gt;\u0026gt;\u0026gt; pprint(list(term_freq_df.sort_values(by='Democratic Score', ascending=False).index[:10]))\n['auto',\n 'america forward',\n 'auto industry',\n 'insurance companies',\n 'pell',\n 'last week',\n 'pell grants',\n \u0026quot;women 's\u0026quot;,\n 'platform',\n 'millionaires']\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; term_freq_df = corpus.get_term_freq_df()\n\u0026gt;\u0026gt;\u0026gt; term_freq_df['Democratic Score'] = corpus.get_scaled_f_scores('democrat')\n\u0026gt;\u0026gt;\u0026gt; pprint(list(term_freq_df.sort_values(by='Democratic Score', ascending=False).index[:10]))\n['auto',\n 'america forward',\n 'auto industry',\n 'insurance companies',\n 'pell',\n 'last week',\n 'pell grants',\n \"women 's\",\n 'platform',\n 'millionaires']\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAnd Republicans:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; term_freq_df['Republican Score'] = corpus.get_scaled_f_scores('republican')\n\u0026gt;\u0026gt;\u0026gt; pprint(list(term_freq_df.sort_values(by='Republican Score', ascending=False).index[:10]))\n['big government',\n \u0026quot;n't build\u0026quot;,\n 'mitt was',\n 'the constitution',\n 'he wanted',\n 'hands that',\n 'of mitt',\n '16 trillion',\n 'turned around',\n 'in florida']\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; term_freq_df['Republican Score'] = corpus.get_scaled_f_scores('republican')\n\u0026gt;\u0026gt;\u0026gt; pprint(list(term_freq_df.sort_values(by='Republican Score', ascending=False).index[:10]))\n['big government',\n \"n't build\",\n 'mitt was',\n 'the constitution',\n 'he wanted',\n 'hands that',\n 'of mitt',\n '16 trillion',\n 'turned around',\n 'in florida']\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing term associations\u003c/h3\u003e\u003ca id=\"user-content-visualizing-term-associations\" class=\"anchor\" aria-label=\"Permalink: Visualizing term associations\" href=\"#visualizing-term-associations\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNow, let's write the scatter plot a stand-alone HTML file. We'll make the y-axis category \"democrat\", and name\nthe category \"Democrat\" with a capital \"D\" for presentation\npurposes. We'll name the other category \"Republican\" with a capital \"R\". All documents in the corpus without\nthe category \"democrat\" will be considered Republican. We set the width of the visualization in pixels, and label\neach excerpt with the speaker using the \u003ccode\u003emetadata\u003c/code\u003e parameter. Finally, we write the visualization to an HTML file.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; html = st.produce_scattertext_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'])\n\u0026gt;\u0026gt;\u0026gt; open(\u0026quot;Convention-Visualization.html\u0026quot;, 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; html = st.produce_scattertext_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'])\n\u0026gt;\u0026gt;\u0026gt; open(\"Convention-Visualization.html\", 'wb').write(html.encode('utf-8'))\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBelow is what the webpage looks like. Click it and wait a few minutes for the interactive version.\n\u003ca href=\"https://jasonkessler.github.io/Conventions-Visualization.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/7db752f8d90001ebcd8f684a89333f2651ff3c3cd891961e0afd4d9abb955fd1/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f32303132636f6e76656e74696f6e73302e302e322e322e706e67\" alt=\"Conventions-Visualization.html\" data-canonical-src=\"https://jasonkessler.github.io/2012conventions0.0.2.2.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing Phrase associations\u003c/h3\u003e\u003ca id=\"user-content-visualizing-phrase-associations\" class=\"anchor\" aria-label=\"Permalink: Visualizing Phrase associations\" href=\"#visualizing-phrase-associations\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eScattertext can also be used to visualize the category association of a variety of different phrase types. The word\n\"phrase\" denotes any single or multi-word collocation.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing PyTextRank\u003c/h4\u003e\u003ca id=\"user-content-using-pytextrank\" class=\"anchor\" aria-label=\"Permalink: Using PyTextRank\" href=\"#using-pytextrank\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://github.com/DerwenAI/pytextrank\"\u003ePyTextRank\u003c/a\u003e, created by Paco Nathan, is an implementation of\na modified version of the TextRank algorithm (Mihalcea and Tarau 2004). It involves graph centrality\nalgorithm to extract a scored list of the most prominent phrases in a document. Here,\nnamed entities recognized by spaCy. As of spaCy version 2.2, these are from an NER system trained on\n\u003ca href=\"https://catalog.ldc.upenn.edu/LDC2013T19\" rel=\"nofollow\"\u003eOntonotes 5\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003ePlease install pytextrank \u003ccode\u003e$ pip3 install pytextrank\u003c/code\u003e before continuing with this tutorial.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo use, build a corpus as normal, but make sure you use spaCy to parse each document as opposed a built-in\n\u003ccode\u003ewhitespace_nlp\u003c/code\u003e-type tokenizer. Note that adding PyTextRank to the spaCy pipeline is not needed, as it\nwill be run separately by the \u003ccode\u003ePyTextRankPhrases\u003c/code\u003e object. We'll reduce the number of phrases displayed in the\nchart to 2000 using the \u003ccode\u003eAssociationCompactor\u003c/code\u003e. The phrases generated will be treated like non-textual features\nsince their document scores will not correspond to word counts.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"import pytextrank, spacy\nimport scattertext as st\n\nnlp = spacy.load('en')\nnlp.add_pipe(\u0026quot;textrank\u0026quot;, last=True)\n\nconvention_df = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(nlp),\n party=lambda df: df.party.apply({'democrat': 'Democratic', 'republican': 'Republican'}.get)\n)\ncorpus = st.CorpusFromParsedDocuments(\n convention_df,\n category_col='party',\n parsed_col='parse',\n feats_from_spacy_doc=st.PyTextRankPhrases()\n).build(\n).compact(\n AssociationCompactor(2000, use_non_text_features=True)\n)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003eimport pytextrank, spacy\nimport scattertext as st\n\nnlp = spacy.load('en')\nnlp.add_pipe(\"textrank\", last=True)\n\nconvention_df = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(nlp),\n party=lambda df: df.party.apply({'democrat': 'Democratic', 'republican': 'Republican'}.get)\n)\ncorpus = st.CorpusFromParsedDocuments(\n convention_df,\n category_col='party',\n parsed_col='parse',\n feats_from_spacy_doc=st.PyTextRankPhrases()\n).build(\n).compact(\n AssociationCompactor(2000, use_non_text_features=True)\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNote that the terms present in the corpus are named entities, and, as opposed to frequency counts, their scores\nare the eigencentrality scores assigned to them by the TextRank algorithm. Running \u003ccode\u003ecorpus.get_metadata_freq_df('')\u003c/code\u003e\nwill return, for each category, the sums of terms' TextRank scores. The dense ranks of these scores will be used to\nconstruct the scatter plot.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"term_category_scores = corpus.get_metadata_freq_df('')\nprint(term_category_scores)\n'''\n Democratic Republican\nterm\nour future 1.113434 0.699103\nyour country 0.314057 0.000000\ntheir home 0.385925 0.000000\nour government 0.185483 0.462122\nour workers 0.199704 0.210989\nher family 0.540887 0.405552\nour time 0.510930 0.410058\n...\n'''\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003eterm_category_scores = corpus.get_metadata_freq_df('')\nprint(term_category_scores)\n'''\n Democratic Republican\nterm\nour future 1.113434 0.699103\nyour country 0.314057 0.000000\ntheir home 0.385925 0.000000\nour government 0.185483 0.462122\nour workers 0.199704 0.210989\nher family 0.540887 0.405552\nour time 0.510930 0.410058\n...\n'''\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBefore we construct the plot, let's some helper variables Since the aggregate TextRank scores aren't particularly\ninterpretable, we'll display the per-category rank of each score in the \u003ccode\u003emetadata_description\u003c/code\u003e field. These will be\ndisplayed after a term is clicked.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"term_ranks = pd.DataFrame(\n np.argsort(np.argsort(-term_category_scores, axis=0), axis=0) + 1,\n columns=term_category_scores.columns,\n index=term_category_scores.index)\n\nmetadata_descriptions = {\n term: '\u0026lt;br/\u0026gt;' + '\u0026lt;br/\u0026gt;'.join(\n '\u0026lt;b\u0026gt;%s\u0026lt;/b\u0026gt; TextRank score rank: %s/%s' % (cat, term_ranks.loc[term, cat], corpus.get_num_metadata())\n for cat in corpus.get_categories())\n for term in corpus.get_metadata()\n}\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003eterm_ranks = pd.DataFrame(\n np.argsort(np.argsort(-term_category_scores, axis=0), axis=0) + 1,\n columns=term_category_scores.columns,\n index=term_category_scores.index)\n\nmetadata_descriptions = {\n term: '\u0026lt;br/\u0026gt;' + '\u0026lt;br/\u0026gt;'.join(\n '\u0026lt;b\u0026gt;%s\u0026lt;/b\u0026gt; TextRank score rank: %s/%s' % (cat, term_ranks.loc[term, cat], corpus.get_num_metadata())\n for cat in corpus.get_categories())\n for term in corpus.get_metadata()\n}\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe can construct term scores in a couple ways. One is a standard dense-rank difference, a score which is used in most\nof the two-category contrastive plots here, which will give us the most category-associated phrases. Another is to use\nthe maximum category-specific score, this will give us the most prominent phrases in each category, regardless of the\nprominence in the other category. We'll take both approaches in this tutorial, let's compute the second kind of score,\nthe category-specific prominence below.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"category_specific_prominence = term_category_scores.apply(\n lambda r: r.Democratic if r.Democratic \u0026gt; r.Republican else -r.Republican,\n axis=1\n)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003ecategory_specific_prominence = term_category_scores.apply(\n lambda r: r.Democratic if r.Democratic \u0026gt; r.Republican else -r.Republican,\n axis=1\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNow we're ready output this chart. Note that we use a \u003ccode\u003edense_rank\u003c/code\u003e transform, which places identically scalled phrases\natop each other. We use \u003ccode\u003ecategory_specific_prominence\u003c/code\u003e as scores, and set \u003ccode\u003esort_by_dist\u003c/code\u003e as \u003ccode\u003eFalse\u003c/code\u003e to ensure the\nphrases displayed on the right-hand side of the chart are ranked by the scores and not distance to the upper-left or\nlower-right corners. Since matching phrases are treated as non-text features, we encode them as single-phrase topic\nmodels and set the \u003ccode\u003etopic_model_preview_size\u003c/code\u003e to \u003ccode\u003e0\u003c/code\u003e to indicate the topic model list shouldn't be shown. Finally,\nwe set ensure the full documents are displayed. Note the documents will be displayed in order of phrase-specific score.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"html = produce_scattertext_explorer(\n corpus,\n category='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n transform=dense_rank,\n metadata=corpus.get_df()['speaker'],\n scores=category_specific_prominence,\n sort_by_dist=False,\n use_non_text_features=True,\n topic_model_term_lists={term: [term] for term in corpus.get_metadata()},\n topic_model_preview_size=0,\n metadata_descriptions=metadata_descriptions,\n use_full_doc=True\n)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003ehtml = produce_scattertext_explorer(\n corpus,\n category='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n transform=dense_rank,\n metadata=corpus.get_df()['speaker'],\n scores=category_specific_prominence,\n sort_by_dist=False,\n use_non_text_features=True,\n topic_model_term_lists={term: [term] for term in corpus.get_metadata()},\n topic_model_preview_size=0,\n metadata_descriptions=metadata_descriptions,\n use_full_doc=True\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/PyTextRankProminenceScore.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/cdc5b90c80123d0b082ac7dfd01ab14a888a8268dbcf18f7fe8c6b4d4e2056b4/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f50795465787452616e6b50726f6d696e656e63652e706e67\" alt=\"PyTextRankProminenceScore.html\" data-canonical-src=\"https://jasonkessler.github.io/PyTextRankProminence.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe most associated terms in each category make some sense, at least on a post hoc analysis. When referring to (then)\nGovernor Romney, Democrats used his surname \"Romney\" in their most central mentions of him, while Republicans used the\nmore familiar and humanizing \"Mitt\". In terms of the President Obama, the phrase \"Obama\" didn't show up as a top term i\nn either, the but the first name \"Barack\" was one of the the most central phrases in Democratic speeches,\nmirroring \"Mitt.\"\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAlternatively, we can Dense Rank Difference in scores to color phrase-points and determine the top phrases to be\ndisplayed on the right-hand side of the chart. Instead of setting \u003ccode\u003escores\u003c/code\u003e as category-specific prominence scores,\nwe set \u003ccode\u003eterm_scorer=RankDifference()\u003c/code\u003e to inject a way determining term scores into the scatter plot creation process.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"html = produce_scattertext_explorer(\n corpus,\n category='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n transform=dense_rank,\n use_non_text_features=True,\n metadata=corpus.get_df()['speaker'],\n term_scorer=RankDifference(),\n sort_by_dist=False,\n topic_model_term_lists={term: [term] for term in corpus.get_metadata()},\n topic_model_preview_size=0, \n metadata_descriptions=metadata_descriptions,\n use_full_doc=True\n)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003ehtml = produce_scattertext_explorer(\n corpus,\n category='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n transform=dense_rank,\n use_non_text_features=True,\n metadata=corpus.get_df()['speaker'],\n term_scorer=RankDifference(),\n sort_by_dist=False,\n topic_model_term_lists={term: [term] for term in corpus.get_metadata()},\n topic_model_preview_size=0, \n metadata_descriptions=metadata_descriptions,\n use_full_doc=True\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/PyTextRankRankDiff.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/de3e3d784bb952e829f6193edff19c9b5cc516c5c2193ff5554745da50e01541/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f50795465787452616e6b52616e6b446966662e706e67\" alt=\"PyTextRankRankDiff.html\" data-canonical-src=\"https://jasonkessler.github.io/PyTextRankRankDiff.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Phrasemachine to find phrases.\u003c/h4\u003e\u003ca id=\"user-content-using-phrasemachine-to-find-phrases\" class=\"anchor\" aria-label=\"Permalink: Using Phrasemachine to find phrases.\" href=\"#using-phrasemachine-to-find-phrases\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ePhrasemachine from \u003ca href=\"https://github.com/AbeHandler\"\u003eAbeHandler\u003c/a\u003e (Handler et al. 2016) uses regular expressions over\nsequences of part-of-speech tags to identify noun phrases. This has the advantage over using spaCy's NP-chunking\nin that it tends to isolote meaningful, large noun phases which are free of appositives.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eA opposed to PyTextRank, we'll just use counts of these phrases, treating them like any other term.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"import spacy\nfrom scattertext import SampleCorpora, PhraseMachinePhrases, dense_rank, RankDifference, AssociationCompactor, produce_scattertext_explorer\nfrom scattertext.CorpusFromPandas import CorpusFromPandas\n\ncorpus = (CorpusFromPandas(SampleCorpora.ConventionData2012.get_data(),\n category_col='party',\n text_col='text',\n feats_from_spacy_doc=PhraseMachinePhrases(),\n nlp=spacy.load('en', parser=False))\n .build().compact(AssociationCompactor(4000)))\n\nhtml = produce_scattertext_explorer(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n transform=dense_rank,\n metadata=corpus.get_df()['speaker'],\n term_scorer=RankDifference(),\n width_in_pixels=1000)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003eimport spacy\nfrom scattertext import SampleCorpora, PhraseMachinePhrases, dense_rank, RankDifference, AssociationCompactor, produce_scattertext_explorer\nfrom scattertext.CorpusFromPandas import CorpusFromPandas\n\ncorpus = (CorpusFromPandas(SampleCorpora.ConventionData2012.get_data(),\n category_col='party',\n text_col='text',\n feats_from_spacy_doc=PhraseMachinePhrases(),\n nlp=spacy.load('en', parser=False))\n .build().compact(AssociationCompactor(4000)))\n\nhtml = produce_scattertext_explorer(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n transform=dense_rank,\n metadata=corpus.get_df()['speaker'],\n term_scorer=RankDifference(),\n width_in_pixels=1000)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/Phrasemachine.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/c04db4cd5b227ebf5d77dd556a4b3f99cd177852a9d865e18f7fd201bf9e670c/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f5068726173654d616368696e652e706e67\" alt=\"Phrasemachine.html\" data-canonical-src=\"https://jasonkessler.github.io/PhraseMachine.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAdding color gradients to explain scores\u003c/h3\u003e\u003ca id=\"user-content-adding-color-gradients-to-explain-scores\" class=\"anchor\" aria-label=\"Permalink: Adding color gradients to explain scores\" href=\"#adding-color-gradients-to-explain-scores\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn Scattertext, various metrics, including term associations, are often shown through two ways. The first\nand most important, is the position in the chart. The second is the color of a point or text. In Scattertext 0.2.21, a\nway of visualizing the semantics of these scores is introduced: the gradient as key.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe gradient, by default, follows the \u003ccode\u003ed3_color_scale\u003c/code\u003e parameter of \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e which is\n\u003ccode\u003ed3.interpolateRdYlBu\u003c/code\u003e by default.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe following additional parameters to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e (and similar functions) allow for the manipulation\ngradients.\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ccode\u003einclude_gradient: bool\u003c/code\u003e (\u003ccode\u003eFalse\u003c/code\u003e by default) is a flag that triggers the appearance of a gradient.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eleft_gradient_term: Optional[str]\u003c/code\u003e indicates the text written on the far-left side of the gradient. It is written in \u003ccode\u003egradient_text_color\u003c/code\u003e and is \u003ccode\u003ecategory_name\u003c/code\u003e by default.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eright_gradient_term: Optional[str]\u003c/code\u003e indicates the text written on the far-left side of the gradient. It is written in \u003ccode\u003egradient_text_color\u003c/code\u003e and is \u003ccode\u003enot_category_name\u003c/code\u003e by default.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003emiddle_gradient_term: Optional[str]\u003c/code\u003e indicates the text written in the middle of the gradient. It is the opposite color of the center gradient color and is empty by default.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003egradient_text_color: Optional[str]\u003c/code\u003e indicates the fixed color of the text written on the gradient. If None, it defaults to opposite color of the gradient.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eleft_text_color: Optional[str]\u003c/code\u003e overrides \u003ccode\u003egradient_text_color\u003c/code\u003e for the left gradient term\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003emiddle_text_color: Optional[str]\u003c/code\u003e overrides \u003ccode\u003egradient_text_color\u003c/code\u003e for the middle gradient term\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eright_text_color: Optional[str]\u003c/code\u003e overrides \u003ccode\u003egradient_text_color\u003c/code\u003e for the right gradient term\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003egradient_colors: Optional[List[str]]\u003c/code\u003e list of hex colors, including '#', (e.g., \u003ccode\u003e['#0000ff', '#980067', '#cc3300', '#32cd00']\u003c/code\u003e) which describe the gradient. If given, these override \u003ccode\u003ed3_color_scale\u003c/code\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eA straightforward example is as follows. Term colors are defined as a mapping between a term name and a \u003ccode\u003e#RRGGBB\u003c/code\u003e color\nas part of the \u003ccode\u003eterm_color\u003c/code\u003e parameter, and the color gradient is defined in \u003ccode\u003egradient_colors\u003c/code\u003e. THe\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\nimport matplotlib.pyplot as plt\nimport matplotlib as mpl\n\ndf = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n)\n\ncorpus = st.CorpusFromParsedDocuments(\n df, category_col='party', parsed_col='parse'\n).build().get_unigram_corpus().compact(st.AssociationCompactor(2000))\n\nhtml = st.produce_scattertext_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n metadata=corpus.get_df()['speaker'],\n transform=st.Scalers.dense_rank,\n include_gradient=True,\n left_gradient_term=\u0026quot;More Democratic\u0026quot;,\n right_gradient_term=\u0026quot;More Republican\u0026quot;,\n middle_gradient_term='Metric: Dense Rank Difference',\n gradient_text_color=\u0026quot;white\u0026quot;,\n term_colors=dict(zip(\n corpus.get_terms(),\n [\n mpl.colors.to_hex(x) for x in plt.get_cmap('brg')(\n st.Scalers.scale_center_zero_abs(\n st.RankDifferenceScorer(corpus).set_categories('democrat').get_scores()).values\n )\n ]\n )),\n gradient_colors=[mpl.colors.to_hex(x) for x in plt.get_cmap('brg')(np.arange(1., 0., -0.01))],\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ematplotlib\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003epyplot\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eplt\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ematplotlib\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003empl\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eparse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n)\n\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003ecompact\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eAssociationCompactor\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e2000\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eminimum_term_frequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003epmi_threshold_coefficient\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003etransform\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edense_rank\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003einclude_gradient\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eleft_gradient_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"More Democratic\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eright_gradient_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"More Republican\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emiddle_gradient_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Metric: Dense Rank Difference'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003egradient_text_color\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"white\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eterm_colors\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-en\"\u003edict\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003ezip\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e(),\n [\n \u003cspan class=\"pl-s1\"\u003empl\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecolors\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eto_hex\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e) \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eplt\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_cmap\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'brg'\u003c/span\u003e)(\n \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003escale_center_zero_abs\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDifferenceScorer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eset_categories\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eget_scores\u003c/span\u003e()).\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e\n )\n ]\n )),\n \u003cspan class=\"pl-s1\"\u003egradient_colors\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003empl\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecolors\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eto_hex\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e) \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eplt\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_cmap\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'brg'\u003c/span\u003e)(\u003cspan class=\"pl-s1\"\u003enp\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003earange\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e1.\u003c/span\u003e, \u003cspan class=\"pl-c1\"\u003e0.\u003c/span\u003e, \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0.01\u003c/span\u003e))],\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_gradient.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/cccbc78f7e640953b8630142686836e34945ea78c6b5575c4180613191989b70/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f6772616469656e742e706e67\" alt=\"demo_gradient.html\" data-canonical-src=\"https://jasonkessler.github.io/gradient.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing Empath topics and categories\u003c/h3\u003e\u003ca id=\"user-content-visualizing-empath-topics-and-categories\" class=\"anchor\" aria-label=\"Permalink: Visualizing Empath topics and categories\" href=\"#visualizing-empath-topics-and-categories\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn order to visualize Empath (Fast et al., 2016) topics and categories instead of terms, we'll need to\ncreate a \u003ccode\u003eCorpus\u003c/code\u003e of extracted topics and categories rather than unigrams and\nbigrams. To do so, use the \u003ccode\u003eFeatsOnlyFromEmpath\u003c/code\u003e feature extractor. See the source code for\nexamples of how to make your own.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWhen creating the visualization, pass the \u003ccode\u003euse_non_text_features=True\u003c/code\u003e argument into\n\u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e. This will instruct it to use the labeled Empath\ntopics and categories instead of looking for terms. Since the documents returned\nwhen a topic or category label is clicked will be in order of the document-level\ncategory-association strength, setting \u003ccode\u003euse_full_doc=True\u003c/code\u003e makes sense, unless you have\nenormous documents. Otherwise, the first 300 characters will be shown.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e(New in 0.0.26). Ensure you include \u003ccode\u003etopic_model_term_lists=feat_builder.get_top_model_term_lists()\u003c/code\u003e\nin \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e to ensure it bolds passages of snippets that match the\ntopic model.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; feat_builder = st.FeatsFromOnlyEmpath()\n\u0026gt;\u0026gt;\u0026gt; empath_corpus = st.CorpusFromParsedDocuments(convention_df,\n... category_col='party',\n... feats_from_spacy_doc=feat_builder,\n... parsed_col='text').build()\n\u0026gt;\u0026gt;\u0026gt; html = st.produce_scattertext_explorer(empath_corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'],\n... use_non_text_features=True,\n... use_full_doc=True,\n... topic_model_term_lists=feat_builder.get_top_model_term_lists())\n\u0026gt;\u0026gt;\u0026gt; open(\u0026quot;Convention-Visualization-Empath.html\u0026quot;, 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; feat_builder = st.FeatsFromOnlyEmpath()\n\u0026gt;\u0026gt;\u0026gt; empath_corpus = st.CorpusFromParsedDocuments(convention_df,\n... category_col='party',\n... feats_from_spacy_doc=feat_builder,\n... parsed_col='text').build()\n\u0026gt;\u0026gt;\u0026gt; html = st.produce_scattertext_explorer(empath_corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'],\n... use_non_text_features=True,\n... use_full_doc=True,\n... topic_model_term_lists=feat_builder.get_top_model_term_lists())\n\u0026gt;\u0026gt;\u0026gt; open(\"Convention-Visualization-Empath.html\", 'wb').write(html.encode('utf-8'))\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/Convention-Visualization-Empath.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/1a7b7b0250f763a20fed2a0fda35e9c20f2b1ffc01eec2b1f228ab29058e29cd/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e2d56697375616c697a6174696f6e2d456d706174682e706e67\" alt=\"Convention-Visualization-Empath.html\" data-canonical-src=\"https://jasonkessler.github.io/Convention-Visualization-Empath.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003ec\nScattertext also includes a feature builder to explore the relationship between General Inquirer Tag Categoires\nand Document Categories. We'll use a slightly different approach, looking at relationship of GI Tag Categories to\npolitical parties by using the\nZ-Scores of the Log-Odds-Ratio with Uninformative Dirichlet Priors (Monroe 2008). We'll use\nthe \u003ccode\u003eproduce_frequency_explorer\u003c/code\u003e plot\nvariation to visualize this relationship, setting the x-axis as the number of times a word in the tag category occurs,\nand the y-axis as the z-score.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor more information on the General Inquirer, please see\nthe \u003ca href=\"http://www.wjh.harvard.edu/~inquirer/\" rel=\"nofollow\"\u003eGeneral Inquirer Home Page\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe'll use the same data set as before, except we'll use the \u003ccode\u003eFeatsFromGeneralInquirer\u003c/code\u003e feature builder.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; general_inquirer_feature_builder = st.FeatsFromGeneralInquirer()\n\u0026gt;\u0026gt;\u0026gt; corpus = st.CorpusFromPandas(convention_df,\n... category_col='party',\n... text_col='text',\n... nlp=st.whitespace_nlp_with_sentences,\n... feats_from_spacy_doc=general_inquirer_feature_builder).build()\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; general_inquirer_feature_builder = st.FeatsFromGeneralInquirer()\n\u0026gt;\u0026gt;\u0026gt; corpus = st.CorpusFromPandas(convention_df,\n... category_col='party',\n... text_col='text',\n... nlp=st.whitespace_nlp_with_sentences,\n... feats_from_spacy_doc=general_inquirer_feature_builder).build()\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we'll call \u003ccode\u003eproduce_frequency_explorer\u003c/code\u003e in a similar way we called \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e in the previous\nsection.\nThere are a few differences, however. First, we specify the \u003ccode\u003eLogOddsRatioUninformativeDirichletPrior\u003c/code\u003e term scorer, which\nscores the relationships between the categories. The \u003ccode\u003egrey_threshold\u003c/code\u003e indicates the points scoring between [-1.96, 1.96]\n(i.e., p \u0026gt; 0.05) should be colored gray. The\nargument \u003ccode\u003emetadata_descriptions=general_inquirer_feature_builder.get_definitions()\u003c/code\u003e\nindicates that a dictionary mapping the tag name to a string definition is passed. When a tag is clicked, the definition\nin the dictionary will be shown below the plot, as shown in the image following the snippet.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; html = st.produce_frequency_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... metadata=convention_df['speaker'],\n... use_non_text_features=True,\n... use_full_doc=True,\n... term_scorer=st.LogOddsRatioUninformativeDirichletPrior(),\n... grey_threshold=1.96,\n... width_in_pixels=1000,\n... topic_model_term_lists=general_inquirer_feature_builder.get_top_model_term_lists(),\n... metadata_descriptions=general_inquirer_feature_builder.get_definitions())\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; html = st.produce_frequency_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... metadata=convention_df['speaker'],\n... use_non_text_features=True,\n... use_full_doc=True,\n... term_scorer=st.LogOddsRatioUninformativeDirichletPrior(),\n... grey_threshold=1.96,\n... width_in_pixels=1000,\n... topic_model_term_lists=general_inquirer_feature_builder.get_top_model_term_lists(),\n... metadata_descriptions=general_inquirer_feature_builder.get_definitions())\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eHere's the resulting chart.\u003cbr\u003e\n\u003ca href=\"https://jasonkessler.github.io/demo_general_inquirer_frequency_plot.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/1bb4c4ae64b1b539244f3936a8960293398eb7b0a106c81dda8c5db745ec072c/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f67656e6572616c5f696e7175697265722e706e67\" alt=\"demo_general_inquirer_frequency_plot.html\" data-canonical-src=\"https://jasonkessler.github.io/general_inquirer.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_general_inquirer_frequency_plot.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/d32a10b53f6e165909ada541eb99ba108a9de67d647a5544e2ea2357dcd3479f/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f67656e6572616c5f696e717569726572322e706e67\" alt=\"demo_general_inquirer_frequency_plot.html\" data-canonical-src=\"https://jasonkessler.github.io/general_inquirer2.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing the Moral Foundations 2.0 Dictionary\u003c/h3\u003e\u003ca id=\"user-content-visualizing-the-moral-foundations-20-dictionary\" class=\"anchor\" aria-label=\"Permalink: Visualizing the Moral Foundations 2.0 Dictionary\" href=\"#visualizing-the-moral-foundations-20-dictionary\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe \u003ca href=\"https://moralfoundations.org/\" rel=\"nofollow\"\u003e[Moral Foundations Theory]\u003c/a\u003e proposes six psychological constructs\nas building blocks of moral thinking, as described in Graham et al. (2013). These foundations are,\nas described on \u003ca href=\"https://moralfoundations.org/\" rel=\"nofollow\"\u003e[moralfoundations.org]\u003c/a\u003e: care/harm, fairness/cheating, loyalty/betrayal,\nauthority/subversion, sanctity/degradation, and liberty/oppression. Please see the site for a more in-depth discussion\nof these foundations.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFrimer et al. (2019) created the Moral Foundations Dictionary 2.0, or a lexicon of terms which invoke a moral foundation\nas a virtue (favorable toward the foundation) or a vice (in opposition to the foundation).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThis dictionary can be used in the same way as the General Inquirer. In this example, we can plot the Cohen's d scores\nof\nfoundation-word counts relative to the frequencies words involving those foundations were invoked.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can first load the corpus as normal, and use \u003ccode\u003est.FeatsFromMoralFoundationsDictionary()\u003c/code\u003e to extract features.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\n\nconvention_df = st.SampleCorpora.ConventionData2012.get_data()\nmoral_foundations_feats = st.FeatsFromMoralFoundationsDictionary()\ncorpus = st.CorpusFromPandas(convention_df,\n category_col='party',\n text_col='text',\n nlp=st.whitespace_nlp_with_sentences,\n feats_from_spacy_doc=moral_foundations_feats).build()\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003emoral_foundations_feats\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFeatsFromMoralFoundationsDictionary\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromPandas\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etext_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003efeats_from_spacy_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003emoral_foundations_feats\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, let's use Cohen's d term scorer to analyze the corpus, and describe a set of Cohen's d association scores.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"cohens_d_scorer = st.CohensD(corpus).use_metadata()\nterm_scorer = cohens_d_scorer.set_categories('democrat', ['republican']).term_scorer.get_score_df()\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecohens_d_scorer\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCohensD\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003euse_metadata\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecohens_d_scorer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eset_categories\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e, [\u003cspan class=\"pl-s\"\u003e'republican'\u003c/span\u003e]).\u003cspan class=\"pl-c1\"\u003eterm_scorer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_score_df\u003c/span\u003e()\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWhich yields the following data frame:\u003c/p\u003e\n\u003cmarkdown-accessiblity-table\u003e\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth align=\"left\"\u003e\u003c/th\u003e\n\u003cth align=\"right\"\u003ecohens_d\u003c/th\u003e\n\u003cth align=\"right\"\u003ecohens_d_se\u003c/th\u003e\n\u003cth align=\"right\"\u003ecohens_d_z\u003c/th\u003e\n\u003cth align=\"right\"\u003ecohens_d_p\u003c/th\u003e\n\u003cth align=\"right\"\u003ehedges_g\u003c/th\u003e\n\u003cth align=\"right\"\u003ehedges_g_se\u003c/th\u003e\n\u003cth align=\"right\"\u003ehedges_g_z\u003c/th\u003e\n\u003cth align=\"right\"\u003ehedges_g_p\u003c/th\u003e\n\u003cth align=\"right\"\u003em1\u003c/th\u003e\n\u003cth align=\"right\"\u003em2\u003c/th\u003e\n\u003cth align=\"right\"\u003ecount1\u003c/th\u003e\n\u003cth align=\"right\"\u003ecount2\u003c/th\u003e\n\u003cth align=\"right\"\u003edocs1\u003c/th\u003e\n\u003cth align=\"right\"\u003edocs2\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003ecare.virtue\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.662891\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.149425\u003c/td\u003e\n\u003ctd align=\"right\"\u003e4.43629\u003c/td\u003e\n\u003ctd align=\"right\"\u003e4.57621e-06\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.660257\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.159049\u003c/td\u003e\n\u003ctd align=\"right\"\u003e4.15129\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.65302e-05\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.195049\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.12164\u003c/td\u003e\n\u003ctd align=\"right\"\u003e760\u003c/td\u003e\n\u003ctd align=\"right\"\u003e379\u003c/td\u003e\n\u003ctd align=\"right\"\u003e115\u003c/td\u003e\n\u003ctd align=\"right\"\u003e54\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003ecare.vice\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.24435\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.146025\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.67335\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0471292\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.243379\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.152654\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.59432\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0554325\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0580005\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0428358\u003c/td\u003e\n\u003ctd align=\"right\"\u003e244\u003c/td\u003e\n\u003ctd align=\"right\"\u003e121\u003c/td\u003e\n\u003ctd align=\"right\"\u003e80\u003c/td\u003e\n\u003ctd align=\"right\"\u003e41\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003efairness.virtue\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.176794\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.145767\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.21286\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.112592\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.176092\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.152164\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.15725\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.123586\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0502469\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0403369\u003c/td\u003e\n\u003ctd align=\"right\"\u003e225\u003c/td\u003e\n\u003ctd align=\"right\"\u003e107\u003c/td\u003e\n\u003ctd align=\"right\"\u003e71\u003c/td\u003e\n\u003ctd align=\"right\"\u003e39\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003efairness.vice\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0707162\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.145528\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.485928\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.313509\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0704352\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.151711\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.464273\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.321226\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00718627\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00573227\u003c/td\u003e\n\u003ctd align=\"right\"\u003e32\u003c/td\u003e\n\u003ctd align=\"right\"\u003e14\u003c/td\u003e\n\u003ctd align=\"right\"\u003e21\u003c/td\u003e\n\u003ctd align=\"right\"\u003e10\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eauthority.virtue\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.0187793\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.145486\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.12908\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.551353\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.0187047\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.15163\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.123357\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.549088\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.358192\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.361191\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1281\u003c/td\u003e\n\u003ctd align=\"right\"\u003e788\u003c/td\u003e\n\u003ctd align=\"right\"\u003e122\u003c/td\u003e\n\u003ctd align=\"right\"\u003e66\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eauthority.vice\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.0354164\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.145494\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.243422\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.596161\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.0352757\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.151646\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.232619\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.591971\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00353465\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00390602\u003c/td\u003e\n\u003ctd align=\"right\"\u003e20\u003c/td\u003e\n\u003ctd align=\"right\"\u003e14\u003c/td\u003e\n\u003ctd align=\"right\"\u003e14\u003c/td\u003e\n\u003ctd align=\"right\"\u003e10\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003esanctity.virtue\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.512145\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.147848\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-3.46399\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.999734\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.51011\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.156098\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-3.26788\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.999458\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0587987\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.101677\u003c/td\u003e\n\u003ctd align=\"right\"\u003e265\u003c/td\u003e\n\u003ctd align=\"right\"\u003e309\u003c/td\u003e\n\u003ctd align=\"right\"\u003e74\u003c/td\u003e\n\u003ctd align=\"right\"\u003e48\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003esanctity.vice\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.108011\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.145589\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.74189\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.770923\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.107582\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.151826\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.708585\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.760709\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00845048\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0109339\u003c/td\u003e\n\u003ctd align=\"right\"\u003e35\u003c/td\u003e\n\u003ctd align=\"right\"\u003e28\u003c/td\u003e\n\u003ctd align=\"right\"\u003e23\u003c/td\u003e\n\u003ctd align=\"right\"\u003e20\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eloyalty.virtue\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.413696\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.147031\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-2.81367\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.997551\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.412052\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.154558\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-2.666\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.996162\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.259296\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.309776\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1056\u003c/td\u003e\n\u003ctd align=\"right\"\u003e717\u003c/td\u003e\n\u003ctd align=\"right\"\u003e119\u003c/td\u003e\n\u003ctd align=\"right\"\u003e66\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eloyalty.vice\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.0854683\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.145549\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.587213\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.72147\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.0851287\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.151751\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-0.560978\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.712594\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00124518\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.00197022\u003c/td\u003e\n\u003ctd align=\"right\"\u003e5\u003c/td\u003e\n\u003ctd align=\"right\"\u003e5\u003c/td\u003e\n\u003ctd align=\"right\"\u003e5\u003c/td\u003e\n\u003ctd align=\"right\"\u003e4\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\u003c/markdown-accessiblity-table\u003e\n\u003cp dir=\"auto\"\u003eThis data frame gives us Cohen's d scores (and their standard errors and z-scores), Hedge's \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$g$\u003c/math-renderer\u003e scores (ditto),\nthe mean document-length normalized topic usage per category (where the in-focus category is m1 [in this case Democrats]\nand the out-of-focus is m2), the raw number of words used in for each topic (count1 and count2), and the number of\ndocuments\nin each category with the topic (docs1 and docs2).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that Cohen's d is the difference of m1 and m2 divided by their pooled standard deviation.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNow, let's plot the d-scores of foundations vs. their frequencies.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_frequency_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n metadata=convention_df['speaker'],\n use_non_text_features=True,\n use_full_doc=True,\n term_scorer=st.CohensD(corpus).use_metadata(),\n grey_threshold=0,\n width_in_pixels=1000,\n topic_model_term_lists=moral_foundations_feats.get_top_model_term_lists(),\n metadata_descriptions=moral_foundations_feats.get_definitions()\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_frequency_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003euse_non_text_features\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_full_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCohensD\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003euse_metadata\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003egrey_threshold\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etopic_model_term_lists\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003emoral_foundations_feats\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_top_model_term_lists\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003emetadata_descriptions\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003emoral_foundations_feats\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_definitions\u003c/span\u003e()\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_moral_foundations.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/3f55bb37c0bfc87a805dae0d5dfe633ce0a9b8031397966ec39cc341b3444452/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f6d6f72616c5f666f756e646174696f6e732e706e67\" alt=\"demo_moral_foundations.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_moral_foundations.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eOrdering Terms by Corpus Characteristicness\u003c/h3\u003e\u003ca id=\"user-content-ordering-terms-by-corpus-characteristicness\" class=\"anchor\" aria-label=\"Permalink: Ordering Terms by Corpus Characteristicness\" href=\"#ordering-terms-by-corpus-characteristicness\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eOften the terms of most interest are ones that are characteristic to the corpus as a whole. These are terms which occur\nfrequently in all sets of documents being studied, but relatively infrequent compared to general term frequencies.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can produce a plot with a characteristic score on the x-axis and class-association scores on the y-axis using the\nfunction \u003ccode\u003eproduce_characteristic_explorer\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCorpus characteristicness is the difference in dense term ranks between the words in all of the documents in the study\nand a general English-language frequency list. See\nthis \u003ca href=\"http://nbviewer.jupyter.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Class-Association-Scores.ipynb\" rel=\"nofollow\"\u003eTalk on Term-Class Association Scores\u003c/a\u003e\nfor a more thorough explanation.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\n\ncorpus = (st.CorpusFromPandas(st.SampleCorpora.ConventionData2012.get_data(),\n category_col='party',\n text_col='text',\n nlp=st.whitespace_nlp_with_sentences)\n .build()\n .get_unigram_corpus()\n .compact(st.ClassPercentageCompactor(term_count=2,\n term_ranker=st.OncePerDocFrequencyRanker)))\nhtml = st.produce_characteristic_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n metadata=corpus.get_df()['speaker']\n)\nopen('demo_characteristic_chart.html', 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromPandas\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etext_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n .\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e()\n .\u003cspan class=\"pl-c1\"\u003ecompact\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eClassPercentageCompactor\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_count\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eterm_ranker\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eOncePerDocFrequencyRanker\u003c/span\u003e)))\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_characteristic_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e]\n)\n\u003cspan class=\"pl-en\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'demo_characteristic_chart.html'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'wb'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ewrite\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eencode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e))\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_characteristic_chart.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/d1636497bd0cbfa8b1845ebfc39c222b6d3de2739cce51f3dce23d434521bcab/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f63686172616374657269737469635f63686172742e706e67\" alt=\"demo_characteristic_chart.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_characteristic_chart.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eDocument-Based Scatterplots\u003c/h3\u003e\u003ca id=\"user-content-document-based-scatterplots\" class=\"anchor\" aria-label=\"Permalink: Document-Based Scatterplots\" href=\"#document-based-scatterplots\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn addition to words, phases and topics, we can make each point correspond to a document. Let's first create\na corpus object for the 2012 Conventions data set. This explanation follows \u003ccode\u003edemo_pca_documents.py\u003c/code\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import pandas as pd\nfrom sklearn.feature_extraction.text import TfidfTransformer\nimport scattertext as st\nfrom scipy.sparse.linalg import svds\n\nconvention_df = st.SampleCorpora.ConventionData2012.get_data()\nconvention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)\ncorpus = (st.CorpusFromParsedDocuments(convention_df,\n category_col='party',\n parsed_col='parse')\n .build()\n .get_stoplisted_unigram_corpus())\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epandas\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003efeature_extraction\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003etext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eTfidfTransformer\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escipy\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003esparse\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003elinalg\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esvds\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n .\u003cspan class=\"pl-c1\"\u003eget_stoplisted_unigram_corpus\u003c/span\u003e())\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, let's add the document names as meta data in the corpus object. The \u003ccode\u003eadd_doc_names_as_metadata\u003c/code\u003e function\ntakes an array of document names, and populates a new corpus' meta data with those names. If two documents have the\nsame name, it appends a number (starting with 1) to the name.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"corpus = corpus.add_doc_names_as_metadata(corpus.get_df()['speaker'])\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eadd_doc_names_as_metadata\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e])\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we find tf.idf scores for the corpus' term-document matrix, run sparse SVD, and add them to a projection\ndata frame, making the x and y-axes the first two singular values, and indexing it on the corpus' meta data, which\ncorresponds to the document names.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"embeddings = TfidfTransformer().fit_transform(corpus.get_term_doc_mat())\nu, s, vt = svds(embeddings, k=3, maxiter=20000, which='LM')\nprojection = pd.DataFrame({'term': corpus.get_metadata(), 'x': u.T[0], 'y': u.T[1]}).set_index('term')\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eembeddings\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eTfidfTransformer\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003efit_transform\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_term_doc_mat\u003c/span\u003e())\n\u003cspan class=\"pl-s1\"\u003eu\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003es\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003evt\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003esvds\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eembeddings\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ek\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e3\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003emaxiter\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e20000\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ewhich\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'LM'\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003eprojection\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eDataFrame\u003c/span\u003e({\u003cspan class=\"pl-s\"\u003e'term'\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_metadata\u003c/span\u003e(), \u003cspan class=\"pl-s\"\u003e'x'\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003eu\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eT\u003c/span\u003e[\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e], \u003cspan class=\"pl-s\"\u003e'y'\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003eu\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eT\u003c/span\u003e[\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e]}).\u003cspan class=\"pl-c1\"\u003eset_index\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'term'\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFinally, set scores as 1 for Democrats and 0 for Republicans, rendering Republican documents as red points and\nDemocratic documents as blue. For more on the \u003ccode\u003eproduce_pca_explorer\u003c/code\u003e function,\nsee \u003ca href=\"#using-svd-to-visualize-any-kind-of-word-embeddings\"\u003eUsing SVD to visualize any kind of word embeddings\u003c/a\u003e.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"category = 'democrat'\nscores = (corpus.get_category_ids() == corpus.get_categories().index(category)).astype(int)\nhtml = st.produce_pca_explorer(corpus,\n category=category,\n category_name='Democratic',\n not_category_name='Republican',\n metadata=convention_df['speaker'],\n width_in_pixels=1000,\n show_axes=False,\n use_non_text_features=True,\n use_full_doc=True,\n projection=projection,\n scores=scores,\n show_top_terms=False)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_category_ids\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e==\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_categories\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e)).\u003cspan class=\"pl-c1\"\u003eastype\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eint\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_pca_explorer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eshow_axes\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_non_text_features\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_full_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eprojection\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eprojection\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eshow_top_terms\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eClick for an interactive version\n\u003ca href=\"https://jasonkessler.github.io/demo_pca_documents.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/05e45f9f4a72357764039f8ffc92260448a79f5050c9b6f8f1cab538d9108c0f/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f646f635f7063612e706e67\" alt=\"demo_pca_documents.html\" data-canonical-src=\"https://jasonkessler.github.io/doc_pca.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Cohen's d or Hedge's g to visualize effect size.\u003c/h3\u003e\u003ca id=\"user-content-using-cohens-d-or-hedges-g-to-visualize-effect-size\" class=\"anchor\" aria-label=\"Permalink: Using Cohen's d or Hedge's g to visualize effect size.\" href=\"#using-cohens-d-or-hedges-g-to-visualize-effect-size\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCohen's d is a popular metric used to measure effect size. The definitions of Cohen's d and Hedge's \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$g$\u003c/math-renderer\u003e\nfrom (Shinichi and Cuthill 2017) are implemented in Scattertext.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; convention_df = st.SampleCorpora.ConventionData2012.get_data()\n\u0026gt;\u0026gt;\u0026gt; corpus = (st.CorpusFromPandas(convention_df,\n... category_col='party',\n ...text_col='text',\n ...nlp=st.whitespace_nlp_with_sentences)\n....build()\n....get_unigram_corpus())\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromPandas\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n... \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n ...\u003cspan class=\"pl-s1\"\u003etext_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e,\n ...\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n....\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n....\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e())\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe can create a term scorer object to examine the effect sizes and other metrics.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt; \u0026gt; term_scorer = st.CohensD(corpus).set_categories('democrat', ['republican'])\n\u0026gt;\u0026gt; \u0026gt; term_scorer.get_score_df().sort_values(by='cohens_d', ascending=False).head()\ncohens_d\ncohens_d_se\ncohens_d_z\ncohens_d_p\nhedges_g\nhedges_g_se\nhedges_g_z\nhedges_g_p\nm1\nm2\nobama\n1.187378\n0.024588\n48.290444\n0.000000e+00\n1.187322\n0.018419\n64.461363\n0.0\n0.007778\n0.002795\n\n\nclass 0.855859 0.020848 41.052045 0.000000e+00 0.855818 0.017227 49.677688 0.0 0.002222 0.000375\n\n\nmiddle\n0.826895\n0.020553\n40.232746\n0.000000e+00\n0.826857\n0.017138\n48.245626\n0.0\n0.002316\n0.000400\npresident\n0.820825\n0.020492\n40.056541\n0.000000e+00\n0.820786\n0.017120\n47.942661\n0.0\n0.010231\n0.005369\nbarack\n0.730624\n0.019616\n37.245725\n6.213052e-304\n0.730589\n0.016862\n43.327800\n0.0\n0.002547\n0.000725\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCohensD\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eset_categories\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e, [\u003cspan class=\"pl-s\"\u003e'republican'\u003c/span\u003e])\u003cspan class=\"pl-s1\"\u003e\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_score_df\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'cohens_d'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eascending\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ehead\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003ecohens_d\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ecohens_d_se\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ecohens_d_z\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ecohens_d_p\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehedges_g\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehedges_g_se\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehedges_g_z\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehedges_g_p\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003em1\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003em2\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eobama\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e1.187378\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.024588\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e48.290444\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.000000e+00\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e1.187322\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.018419\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e64.461363\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.007778\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.002795\u003c/span\u003e\n\n\n\u003cspan class=\"pl-k\"\u003eclass\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.855859\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.020848\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e41.052045\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.000000e+00\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.855818\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.017227\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e49.677688\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.002222\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.000375\u003c/span\u003e\n\n\n\u003cspan class=\"pl-s1\"\u003emiddle\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.826895\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.020553\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e40.232746\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.000000e+00\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.826857\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.017138\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e48.245626\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.002316\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.000400\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003epresident\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.820825\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.020492\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e40.056541\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.000000e+00\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.820786\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.017120\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e47.942661\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.010231\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.005369\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ebarack\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.730624\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.019616\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e37.245725\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e6.213052e-304\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.730589\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.016862\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e43.327800\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.002547\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.000725\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eOur calculation of Cohen's d is not directly based on term counts. Rather, we divide each document's term counts by the\ntotal number\nof terms in the document before calculating the statistics. \u003ccode\u003em1\u003c/code\u003e and \u003ccode\u003em2\u003c/code\u003e are, respectively the mean portions of words\nin speeches made by Democrats and Republicans that were the term in question. The effect size (\u003ccode\u003ecohens_d\u003c/code\u003e) is the\ndifference between these means divided by the pooled standard deviation. \u003ccode\u003ecohens_d_se\u003c/code\u003e is the standard error\nof the statistic, while \u003ccode\u003ecohens_d_z\u003c/code\u003e and \u003ccode\u003ecohens_d_p\u003c/code\u003e are the Z-scores and p-values indicating the statistical\nsignificance of the effect. Corresponding columns are present for Hedge's \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$g$\u003c/math-renderer\u003e a version of Cohen's d adjusted for data set size.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; st.produce_frequency_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n term_scorer=st.CohensD(corpus),\n metadata=convention_df['speaker'],\n grey_threshold=0\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_frequency_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCohensD\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003egrey_threshold\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eClick for an interactive version.\n\u003ca href=\"https://jasonkessler.github.io/demo_cohens_d.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/5634036c343602f97e0cab4c656df9800f742efd05420fcb0352b86c28b3d773/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f636f68656e5f642e706e67\" alt=\"demo_cohens_d.html\" data-canonical-src=\"https://jasonkessler.github.io/cohen_d.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Cliff's Delta to visualize effect size\u003c/h3\u003e\u003ca id=\"user-content-using-cliffs-delta-to-visualize-effect-size\" class=\"anchor\" aria-label=\"Permalink: Using Cliff's Delta to visualize effect size\" href=\"#using-cliffs-delta-to-visualize-effect-size\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCliff's Delta (Cliff 1993) uses a non-parametric approach to computing effect size. In our setting, the term's frequency\npercentage of each document in the focus set is compared with that of the background set. For each pair of documents,\na score of 1 is given if the focus document's frequency percentage is larger than the background, 0 if identical, and -1\nif different. Note that this assumes document lengths are similarly distributed across the focus and background corpora.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSee [\u003ca href=\"https://real-statistics.com/non-parametric-tests/mann-whitney-test/cliffs-delta/\" rel=\"nofollow\"\u003ehttps://real-statistics.com/non-parametric-tests/mann-whitney-test/cliffs-delta/\u003c/a\u003e] for the formulas used in \u003ccode\u003eCliffsDelta\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eBelow is an example of how to use \u003ccode\u003eCliffsDelta\u003c/code\u003e to find and plot term scores:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"nlp = spacy.blank('en')\nnlp.add_pipe('sentencizer')\nconvention_df = st.SampleCorpora.ConventionData2012.get_data().assign(\n party = lambda df: df.party.apply(\n lambda x: {'democrat': 'Dem', 'republican': 'Rep'}[x]),\n SpacyParse=lambda df: df.text.progress_apply(nlp)\n)\ncorpus = st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='SpacyParse').build(\n).remove_terms_used_in_less_than_num_docs(10)\nst.CliffsDelta(corpus).set_categories('Dem').get_score_df().sort_values(by='Dem', ascending=False).iloc[:10]\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espacy\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eblank\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'en'\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eadd_pipe\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'sentencizer'\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eparty\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eparty\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\n \u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e: {\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Dem'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'republican'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Rep'\u003c/span\u003e}[\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e]),\n \u003cspan class=\"pl-v\"\u003eSpacyParse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eprogress_apply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e)\n)\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'SpacyParse'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e(\n).\u003cspan class=\"pl-c1\"\u003eremove_terms_used_in_less_than_num_docs\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCliffsDelta\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eset_categories\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'Dem'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eget_score_df\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Dem'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eascending\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[:\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e]\u003c/pre\u003e\u003c/div\u003e\n\u003cmarkdown-accessiblity-table\u003e\u003ctable\u003e\n\u003cthead\u003e\n\u003ctr\u003e\n\u003cth align=\"left\"\u003eterm\u003c/th\u003e\n\u003cth align=\"right\"\u003eMetric\u003c/th\u003e\n\u003cth align=\"right\"\u003eStddev\u003c/th\u003e\n\u003cth align=\"right\"\u003eLow-5.0% CI\u003c/th\u003e\n\u003cth align=\"right\"\u003eHigh-5.0% CI\u003c/th\u003e\n\u003cth align=\"right\"\u003eTermCount1\u003c/th\u003e\n\u003cth align=\"right\"\u003eTermCount2\u003c/th\u003e\n\u003cth align=\"right\"\u003eDocCount1\u003c/th\u003e\n\u003cth align=\"right\"\u003eDocCount2\u003c/th\u003e\n\u003c/tr\u003e\n\u003c/thead\u003e\n\u003ctbody\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eobama\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.597191\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0266606\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-1.35507\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-1.03477\u003c/td\u003e\n\u003ctd align=\"right\"\u003e537\u003c/td\u003e\n\u003ctd align=\"right\"\u003e165\u003c/td\u003e\n\u003ctd align=\"right\"\u003e113\u003c/td\u003e\n\u003ctd align=\"right\"\u003e40\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003epresident obama\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.565903\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0314348\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-2.37978\u003c/td\u003e\n\u003ctd align=\"right\"\u003e-1.74131\u003c/td\u003e\n\u003ctd align=\"right\"\u003e351\u003c/td\u003e\n\u003ctd align=\"right\"\u003e78\u003c/td\u003e\n\u003ctd align=\"right\"\u003e100\u003c/td\u003e\n\u003ctd align=\"right\"\u003e30\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003epresident\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.426337\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0293418\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.22784\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.909226\u003c/td\u003e\n\u003ctd align=\"right\"\u003e740\u003c/td\u003e\n\u003ctd align=\"right\"\u003e301\u003c/td\u003e\n\u003ctd align=\"right\"\u003e113\u003c/td\u003e\n\u003ctd align=\"right\"\u003e53\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003emiddle\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.417591\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0267365\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.10791\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.840932\u003c/td\u003e\n\u003ctd align=\"right\"\u003e164\u003c/td\u003e\n\u003ctd align=\"right\"\u003e27\u003c/td\u003e\n\u003ctd align=\"right\"\u003e68\u003c/td\u003e\n\u003ctd align=\"right\"\u003e12\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eclass\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.415373\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0280622\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.09032\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.815649\u003c/td\u003e\n\u003ctd align=\"right\"\u003e161\u003c/td\u003e\n\u003ctd align=\"right\"\u003e25\u003c/td\u003e\n\u003ctd align=\"right\"\u003e69\u003c/td\u003e\n\u003ctd align=\"right\"\u003e14\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003ebarack\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.406997\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0281692\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1.00765\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.750963\u003c/td\u003e\n\u003ctd align=\"right\"\u003e202\u003c/td\u003e\n\u003ctd align=\"right\"\u003e46\u003c/td\u003e\n\u003ctd align=\"right\"\u003e76\u003c/td\u003e\n\u003ctd align=\"right\"\u003e16\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003ebarack obama\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.402562\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.027512\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.965359\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.723403\u003c/td\u003e\n\u003ctd align=\"right\"\u003e164\u003c/td\u003e\n\u003ctd align=\"right\"\u003e45\u003c/td\u003e\n\u003ctd align=\"right\"\u003e76\u003c/td\u003e\n\u003ctd align=\"right\"\u003e16\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003ethat 's\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.384085\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0227344\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.809747\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.634705\u003c/td\u003e\n\u003ctd align=\"right\"\u003e236\u003c/td\u003e\n\u003ctd align=\"right\"\u003e91\u003c/td\u003e\n\u003ctd align=\"right\"\u003e89\u003c/td\u003e\n\u003ctd align=\"right\"\u003e31\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003eobama .\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.356245\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0237453\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.664688\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.509631\u003c/td\u003e\n\u003ctd align=\"right\"\u003e70\u003c/td\u003e\n\u003ctd align=\"right\"\u003e5\u003c/td\u003e\n\u003ctd align=\"right\"\u003e49\u003c/td\u003e\n\u003ctd align=\"right\"\u003e4\u003c/td\u003e\n\u003c/tr\u003e\n\u003ctr\u003e\n\u003ctd align=\"left\"\u003efor\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.35526\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.0364138\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.70142\u003c/td\u003e\n\u003ctd align=\"right\"\u003e0.46487\u003c/td\u003e\n\u003ctd align=\"right\"\u003e1020\u003c/td\u003e\n\u003ctd align=\"right\"\u003e542\u003c/td\u003e\n\u003ctd align=\"right\"\u003e119\u003c/td\u003e\n\u003ctd align=\"right\"\u003e62\u003c/td\u003e\n\u003c/tr\u003e\n\u003c/tbody\u003e\n\u003c/table\u003e\u003c/markdown-accessiblity-table\u003e\n\u003cp dir=\"auto\"\u003eWe can elegantly display the Cliff's delta scores using \u003ccode\u003edataframe_scattertext\u003c/code\u003e, and describe the point coloring scheme\nusing the \u003ccode\u003einclude_gradient=True\u003c/code\u003e parameter. We set the \u003ccode\u003eleft_gradient_term\u003c/code\u003e, \u003ccode\u003emiddle_gradient_term\u003c/code\u003e, and \u003ccode\u003eright_gradient_term\u003c/code\u003e\nparameters to strings which will appear in their corresonding values.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"plot_df = st.CliffsDelta(\n corpus\n).set_categories(\n category_name='Dem'\n).get_score_df().rename(columns={'Metric': 'CliffsDelta'}).assign(\n Frequency=lambda df: df.TermCount1 + df.TermCount1,\n X=lambda df: df.Frequency,\n Y=lambda df: df.CliffsDelta,\n Xpos=lambda df: st.Scalers.dense_rank(df.X),\n Ypos=lambda df: st.Scalers.scale_center_zero_abs(df.Y),\n ColorScore=lambda df: df.Ypos,\n)\n\nhtml = st.dataframe_scattertext(\n corpus,\n plot_df=plot_df,\n category='Dem', \n category_name='Dem',\n not_category_name='Rep',\n width_in_pixels=1000, \n ignore_categories=False,\n metadata=lambda corpus: corpus.get_df()['speaker'],\n color_score_column='ColorScore',\n left_list_column='ColorScore',\n show_characteristic=False,\n y_label=\u0026quot;Cliff's Delta\u0026quot;,\n x_label='Frequency Ranks',\n y_axis_labels=[f'More Rep: delta={plot_df.CliffsDelta.max():.3f}',\n '',\n f'More Dem: delta={-plot_df.CliffsDelta.max():.3f}'],\n tooltip_columns=['Frequency', 'CliffsDelta'],\n term_description_columns=['CliffsDelta', 'Stddev', 'Low-95.0% CI', 'High-95.0% CI'],\n header_names={'upper': 'Top Dem', 'lower': 'Top Reps'},\n horizontal_line_y_position=0,\n include_gradient=True,\n left_gradient_term='More Republican',\n right_gradient_term='More Democratic',\n middle_gradient_term=\u0026quot;Metric: Cliff's Delta\u0026quot;,\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCliffsDelta\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eset_categories\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Dem'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eget_score_df\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003erename\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecolumns\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e{\u003cspan class=\"pl-s\"\u003e'Metric'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'CliffsDelta'\u003c/span\u003e}).\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-v\"\u003eFrequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eTermCount1\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eTermCount1\u003c/span\u003e,\n \u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFrequency\u003c/span\u003e,\n \u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCliffsDelta\u003c/span\u003e,\n \u003cspan class=\"pl-v\"\u003eXpos\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edense_rank\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e),\n \u003cspan class=\"pl-v\"\u003eYpos\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003escale_center_zero_abs\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e),\n \u003cspan class=\"pl-v\"\u003eColorScore\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eYpos\u003c/span\u003e,\n)\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edataframe_scattertext\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Dem'\u003c/span\u003e, \n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Dem'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Rep'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e, \n \u003cspan class=\"pl-s1\"\u003eignore_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003ecolor_score_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'ColorScore'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eleft_list_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'ColorScore'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eshow_characteristic\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"Cliff's Delta\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Frequency Ranks'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_axis_labels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003ef'More Rep: delta=\u003cspan class=\"pl-s1\"\u003e\u003cspan class=\"pl-kos\"\u003e{\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCliffsDelta\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e():.3f\u003cspan class=\"pl-kos\"\u003e}\u003c/span\u003e\u003c/span\u003e'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e''\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003ef'More Dem: delta=\u003cspan class=\"pl-s1\"\u003e\u003cspan class=\"pl-kos\"\u003e{\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCliffsDelta\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e():.3f\u003cspan class=\"pl-kos\"\u003e}\u003c/span\u003e\u003c/span\u003e'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003etooltip_columns\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Frequency'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'CliffsDelta'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eterm_description_columns\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'CliffsDelta'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Stddev'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Low-95.0% CI'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'High-95.0% CI'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eheader_names\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e{\u003cspan class=\"pl-s\"\u003e'upper'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Top Dem'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'lower'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Top Reps'\u003c/span\u003e},\n \u003cspan class=\"pl-s1\"\u003ehorizontal_line_y_position\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003einclude_gradient\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eleft_gradient_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'More Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eright_gradient_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'More Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emiddle_gradient_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"Metric: Cliff's Delta\"\u003c/span\u003e,\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_cliffs_delta.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/80f777c69f1230b1ba3538c2f03e57e2498b44d4f99593a64f525ea426dbe1f7/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f636c6966667364656c74612e706e67\" alt=\"demo_cliffs_delta.html\" data-canonical-src=\"https://jasonkessler.github.io/cliffsdelta.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Bi-Normal Separation (BNS) to score terms\u003c/h3\u003e\u003ca id=\"user-content-using-bi-normal-separation-bns-to-score-terms\" class=\"anchor\" aria-label=\"Permalink: Using Bi-Normal Separation (BNS) to score terms\" href=\"#using-bi-normal-separation-bns-to-score-terms\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBi-Normal Separation (BNS) (Forman, 2008) was added in version 0.1.8. A variation of (BNS) is used\nwhere \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$F^{-1}(tpr) - F^{-1}(fpr)$\u003c/math-renderer\u003e is not used as an absolute value, but kept as a difference. This allows for\nterms strongly indicative of true positives and false positives to have a high or low score.\nNote that tpr and fpr are scaled to between \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$[\\alpha, 1-\\alpha]$\u003c/math-renderer\u003e where\nalpha is \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$\\in [0, 1]$\u003c/math-renderer\u003e. In Forman (2008) and earlier literature \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$\\alpha=0.0005$\u003c/math-renderer\u003e. In personal correspondence with Forman,\nhe kindly suggested using \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$\\frac{1.}{\\mbox{minimum(positives, negatives)}}$\u003c/math-renderer\u003e. I have implemented this as\n\u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$\\alpha=\\frac{1.}{\\mbox{minimum documents in the least frequent category}}$\u003c/math-renderer\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"corpus = (st.CorpusFromPandas(convention_df,\n category_col='party',\n text_col='text',\n nlp=st.whitespace_nlp_with_sentences)\n .build()\n .get_unigram_corpus()\n .remove_infrequent_words(3, term_ranker=st.OncePerDocFrequencyRanker))\n\nterm_scorer = (st.BNSScorer(corpus).set_categories('democrat'))\nprint(term_scorer.get_score_df().sort_values(by='democrat BNS'))\n\nhtml = st.produce_frequency_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n scores=term_scorer.get_score_df()['democrat BNS'].reindex(corpus.get_terms()).values,\n metadata=lambda c: c.get_df()['speaker'],\n minimum_term_frequency=0,\n grey_threshold=0,\n y_label=f'Bi-normal Separation (alpha={term_scorer.prior_counts})'\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromPandas\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etext_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n .\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e()\n .\u003cspan class=\"pl-c1\"\u003eremove_infrequent_words\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e3\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eterm_ranker\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eOncePerDocFrequencyRanker\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eBNSScorer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eset_categories\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e))\n\u003cspan class=\"pl-en\"\u003eprint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_score_df\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat BNS'\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_frequency_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_score_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'democrat BNS'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003ereindex\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()).\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ec\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eminimum_term_frequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003egrey_threshold\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003ef'Bi-normal Separation (alpha=\u003cspan class=\"pl-s1\"\u003e\u003cspan class=\"pl-kos\"\u003e{\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eprior_counts\u003c/span\u003e\u003cspan class=\"pl-kos\"\u003e}\u003c/span\u003e\u003c/span\u003e)'\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBNS Scored terms using an algorithmically found alpha.\n\u003ca href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_bi_normal_separation.html\" rel=\"nofollow\"\u003e![BNS](https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/d emo_bi_normal_separation.png)\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing correlations to explain classifiers\u003c/h3\u003e\u003ca id=\"user-content-using-correlations-to-explain-classifiers\" class=\"anchor\" aria-label=\"Permalink: Using correlations to explain classifiers\" href=\"#using-correlations-to-explain-classifiers\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe can train a classifier to produce a prediction score for each document. Often classifiers or regressors\nuse features which take into account features beyond the ones represented by Scatterext, be they n-gram, topic,\nextra-linguistic, neural, etc.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can use Scattertext to visualize the correlations between unigrams (or really any feature representation) and\nthe document scores produced by a model.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn the following example, we train a linear SVM using unigram and bi-gram features on the entire convention data set,\nand use the model to make a prediction on each document, and finally using Pearson's \u003cmath-renderer class=\"js-inline-math\" style=\"display: inline-block\" data-static-url=\"https://github.githubassets.com/static\" data-run-id=\"bf8025d7258a64274065d81987021fdd\"\u003e$r$\u003c/math-renderer\u003e to correlate unigram features\nto the distance from the SVM decision boundary.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"from sklearn.svm import LinearSVC\n\nimport scattertext as st\n\ndf = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n)\n\ncorpus = st.CorpusFromParsedDocuments(\n df, category_col='party', parsed_col='parse'\n).build()\n\nX = corpus.get_term_doc_mat()\ny = corpus.get_category_ids()\n\nclf = LinearSVC()\nclf.fit(X=X, y=y == corpus.get_categories().index('democrat'))\ndoc_scores = clf.decision_function(X=X)\n\ncompactcorpus = corpus.get_unigram_corpus().compact(st.AssociationCompactor(2000))\n\nplot_df = st.Correlations().set_correlation_type(\n 'pearsonr'\n).get_correlation_df(\n corpus=compactcorpus,\n document_scores=doc_scores\n).reindex(compactcorpus.get_terms()).assign(\n X=lambda df: df.Frequency,\n Y=lambda df: df['r'],\n Xpos=lambda df: st.Scalers.dense_rank(df.X),\n Ypos=lambda df: st.Scalers.scale_center_zero_abs(df.Y),\n SuppressDisplay=False,\n ColorScore=lambda df: df.Ypos,\n)\n\nhtml = st.dataframe_scattertext(\n compactcorpus,\n plot_df=plot_df,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n width_in_pixels=1000,\n metadata=lambda c: c.get_df()['speaker'],\n unified_context=False,\n ignore_categories=False,\n color_score_column='ColorScore',\n left_list_column='ColorScore',\n y_label=\u0026quot;Pearson r (correlation to SVM document score)\u0026quot;,\n x_label='Frequency Ranks',\n header_names={'upper': 'Top Democratic',\n 'lower': 'Top Republican'},\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003esvm\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eLinearSVC\u003c/span\u003e\n\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eparse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n)\n\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n\n\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_term_doc_mat\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003ey\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_category_ids\u003c/span\u003e()\n\n\u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eLinearSVC\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efit\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ey\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ey\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e==\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_categories\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e))\n\u003cspan class=\"pl-s1\"\u003edoc_scores\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edecision_function\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003ecompactcorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003ecompact\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eAssociationCompactor\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e2000\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorrelations\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eset_correlation_type\u003c/span\u003e(\n \u003cspan class=\"pl-s\"\u003e'pearsonr'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eget_correlation_df\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecompactcorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003edocument_scores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003edoc_scores\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ereindex\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecompactcorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()).\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFrequency\u003c/span\u003e,\n \u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'r'\u003c/span\u003e],\n \u003cspan class=\"pl-v\"\u003eXpos\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edense_rank\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e),\n \u003cspan class=\"pl-v\"\u003eYpos\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003escale_center_zero_abs\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e),\n \u003cspan class=\"pl-v\"\u003eSuppressDisplay\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-v\"\u003eColorScore\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eYpos\u003c/span\u003e,\n)\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edataframe_scattertext\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecompactcorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ec\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eunified_context\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eignore_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecolor_score_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'ColorScore'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eleft_list_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'ColorScore'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"Pearson r (correlation to SVM document score)\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Frequency Ranks'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eheader_names\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e{\u003cspan class=\"pl-s\"\u003e'upper'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Top Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'lower'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Top Republican'\u003c/span\u003e},\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/pearsons.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/pearsons.png\" alt=\"BNS\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Custom Background Word Frequencies\u003c/h3\u003e\u003ca id=\"user-content-using-custom-background-word-frequencies\" class=\"anchor\" aria-label=\"Permalink: Using Custom Background Word Frequencies\" href=\"#using-custom-background-word-frequencies\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eScattertext relies on a set of general-domain English word frequencies when computing unigram characteristic\u003cbr\u003e\nscores. When using running Scattertext on non-English data or in a specific domain, the quality of the scores\nwill degrade.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eEnsure that you are on Scattertext 0.1.6 or higher.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTo remedy this, one can add a custom set of background scores to a Corpus-like object,\nusing the \u003ccode\u003eCorpus.set_background_corpus\u003c/code\u003e function. The function takes a \u003ccode\u003epd.Series\u003c/code\u003e object, indexed on\nterms with numeric count values.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eBy default, [!understanding-scaled-f-score](Scaled F-Score) is used to rank how characteristic\nterms are.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe example below illustrates using Polish background word frequencies.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, we produce a Series object mapping Polish words to their frequencies using a list from\nthe \u003ca href=\"/JasonKessler/scattertext/blob/master/most-common-words-by-language\"\u003ehttps://github.com/oprogramador/most-common-words-by-language\u003c/a\u003e repo.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"polish_word_frequencies = pd.read_csv(\n 'https://raw.githubusercontent.com/hermitdave/FrequencyWords/master/content/2016/pl/pl_50k.txt',\n sep=' ',\n names=['Word', 'Frequency']\n).set_index('Word')['Frequency']\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003epolish_word_frequencies\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eread_csv\u003c/span\u003e(\n \u003cspan class=\"pl-s\"\u003e'https://raw.githubusercontent.com/hermitdave/FrequencyWords/master/content/2016/pl/pl_50k.txt'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esep\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e' '\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enames\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Word'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Frequency'\u003c/span\u003e]\n).\u003cspan class=\"pl-c1\"\u003eset_index\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'Word'\u003c/span\u003e)[\u003cspan class=\"pl-s\"\u003e'Frequency'\u003c/span\u003e]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNote the composition of the Series\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt; \u0026gt; polish_word_frequencies\nWord\nnie\n5875385\nto\n4388099\nsię\n3507076\nw\n2723767\nna\n2309765\nName: Frequency, dtype: int64\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epolish_word_frequencies\u003c/span\u003e\n\u003cspan class=\"pl-v\"\u003eWord\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003enie\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e5875385\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eto\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e4388099\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003esię\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e3507076\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ew\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e2723767\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ena\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e2309765\u003c/span\u003e\n\u003cspan class=\"pl-v\"\u003eName\u003c/span\u003e: \u003cspan class=\"pl-smi\"\u003eFrequency\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003edtype\u003c/span\u003e: \u003cspan class=\"pl-smi\"\u003eint64\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we build a DataFrame, \u003ccode\u003ereviews_df\u003c/code\u003e, consisting of document which appear (to a non-Polish speaker) to be\npositive and negative hotel reviews from the \u003ca href=\"/JasonKessler/scattertext/blob/master/PolEmo2.0\"\u003ehttps://klejbenchmark.com/tasks/\u003c/a\u003e corpus\n(Kocoń, et al. 2019). Note this data is under a CC BY-NC-SA 4.0 license. These are labeled as\n\"__label__meta_plus_m\" and \"__label__meta_minus_m\". We will use Scattertext to compare those\nreviews and determine\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"nlp = spacy.blank('pl')\nnlp.add_pipe('sentencizer')\n\nwith ZipFile(io.BytesIO(urlopen(\n 'https://klejbenchmark.com/static/data/klej_polemo2.0-in.zip'\n).read())) as zf:\n review_df = pd.read_csv(zf.open('train.tsv'), sep='\\t')[\n lambda df: df.target.isin(['__label__meta_plus_m', '__label__meta_minus_m'])\n ].assign(\n Parse=lambda df: df.sentence.apply(nlp)\n )\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espacy\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eblank\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'pl'\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eadd_pipe\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'sentencizer'\u003c/span\u003e)\n\n\u003cspan class=\"pl-k\"\u003ewith\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eZipFile\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eio\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eBytesIO\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eurlopen\u003c/span\u003e(\n \u003cspan class=\"pl-s\"\u003e'https://klejbenchmark.com/static/data/klej_polemo2.0-in.zip'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eread\u003c/span\u003e())) \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ezf\u003c/span\u003e:\n \u003cspan class=\"pl-s1\"\u003ereview_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eread_csv\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ezf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'train.tsv'\u003c/span\u003e), \u003cspan class=\"pl-s1\"\u003esep\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'\u003cspan class=\"pl-cce\"\u003e\\t\u003c/span\u003e'\u003c/span\u003e)[\n \u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etarget\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eisin\u003c/span\u003e([\u003cspan class=\"pl-s\"\u003e'__label__meta_plus_m'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'__label__meta_minus_m'\u003c/span\u003e])\n ].\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-v\"\u003eParse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esentence\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e)\n )\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we wish to create a \u003ccode\u003eParsedCorpus\u003c/code\u003e object from \u003ccode\u003ereview_df\u003c/code\u003e. In preparation, we first assemble a\nlist of Polish stopwords from the \u003ca href=\"https://github.com/bieli/stopwords/\"\u003estopwords\u003c/a\u003e repository. We also\ncreate the \u003ccode\u003enot_a_word\u003c/code\u003e regular expression to filter out terms which do not contain a letter.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"polish_stopwords = {\n stopword for stopword in\n urlopen(\n 'https://raw.githubusercontent.com/bieli/stopwords/master/polish.stopwords.txt'\n ).read().decode('utf-8').split('\\n')\n if stopword.strip()\n}\n\nnot_a_word = re.compile(r'^\\W+$')\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003epolish_stopwords\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e {\n \u003cspan class=\"pl-s1\"\u003estopword\u003c/span\u003e \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003estopword\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e\n \u003cspan class=\"pl-en\"\u003eurlopen\u003c/span\u003e(\n \u003cspan class=\"pl-s\"\u003e'https://raw.githubusercontent.com/bieli/stopwords/master/polish.stopwords.txt'\u003c/span\u003e\n ).\u003cspan class=\"pl-c1\"\u003eread\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003edecode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003esplit\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'\u003cspan class=\"pl-cce\"\u003e\\n\u003c/span\u003e'\u003c/span\u003e)\n \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003estopword\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003estrip\u003c/span\u003e()\n}\n\n\u003cspan class=\"pl-s1\"\u003enot_a_word\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ere\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecompile\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003er'^\\W+$'\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWith these present, we can build a corpus from \u003ccode\u003ereview_df\u003c/code\u003e with the category being the binary\n\"target\" column. We reduce the term space to unigrams and then run the \u003ccode\u003efilter_out\u003c/code\u003e which\ntakes a function to determine if a term should be removed from the corpus. The function identifies\nterms which are in the Polish stoplist or do not contain a letter. Finally, terms occurring\nless than 20 times in the corpus are removed.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe set the background frequency Series we created early as the background corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"corpus = st.CorpusFromParsedDocuments(\n review_df,\n category_col='target',\n parsed_col='Parse'\n).build(\n).get_unigram_corpus(\n).filter_out(\n lambda term: term in polish_stopwords or not_a_word.match(term) is not None\n).remove_infrequent_words(\n minimum_term_count=20\n).set_background_corpus(\n polish_word_frequencies\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ereview_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'target'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Parse'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e(\n).\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e(\n).\u003cspan class=\"pl-c1\"\u003efilter_out\u003c/span\u003e(\n \u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003eterm\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epolish_stopwords\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003eor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enot_a_word\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ematch\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e\u003cspan class=\"pl-c1\"\u003eis\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003enot\u003c/span\u003e\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003eNone\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eremove_infrequent_words\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eminimum_term_count\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e20\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eset_background_corpus\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003epolish_word_frequencies\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNote that a minimum word count of 20 was chosen to ensure that only around 2,000 terms would be displayed\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt; \u0026gt; corpus.get_num_terms()\n2023\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_num_terms\u003c/span\u003e()\n\u003cspan class=\"pl-c1\"\u003e2023\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eRunning \u003ccode\u003eget_term_and_background_counts\u003c/code\u003e shows us total term counts in the corpus compare to background\nfrequency counts. We limit this to terms which only occur in the corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt; \u0026gt; corpus.get_term_and_background_counts()[\n ...\nlambda df: df.corpus \u0026gt; 0\n...].sort_values(by='corpus', ascending=False)\n\nbackground\ncorpus\nm\n341583838.0\n4819.0\nhotelu\n33108.0\n1812.0\nhotel\n297974790.0\n1651.0\ndoktor\n154840.0\n1534.0\npolecam\n0.0\n1438.0\n.........\nszoku\n0.0\n21.0\nbadaniem\n0.0\n21.0\nbalkonu\n0.0\n21.0\nstopnia\n0.0\n21.0\nwobec\n0.0\n21.0\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c1\"\u003e\u0026gt;\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_term_and_background_counts\u003c/span\u003e()[\n ...\n\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e\n...].\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'corpus'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eascending\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003ebackground\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003em\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e341583838.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e4819.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehotelu\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e33108.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e1812.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehotel\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e297974790.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e1651.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003edoktor\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e154840.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e1534.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003epolecam\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e1438.0\u003c/span\u003e\n.........\n\u003cspan class=\"pl-c1\"\u003eszoku\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e21.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ebadaniem\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e21.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ebalkonu\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e21.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003estopnia\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e21.0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ewobec\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e0.0\u003c/span\u003e\n\u003cspan class=\"pl-c1\"\u003e21.0\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eInteresting, the term \"polecam\" appears very frequently in the corpus, but does not appear at all\nin the background corpus, making it highly characteristic. Judging from Google Translate, it appears to\nmean something related to \"recommend\".\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe are now ready to display the plot.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_scattertext_explorer(\n corpus,\n category='__label__meta_plus_m',\n category_name='Plus-M',\n not_category_name='Minus-M',\n minimum_term_frequency=1,\n width_in_pixels=1000,\n transform=st.Scalers.dense_rank\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'__label__meta_plus_m'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Plus-M'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Minus-M'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eminimum_term_frequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etransform\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edense_rank\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_pos_neg_scattertext.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_pos_neg_scattertext.png\" alt=\"Polish Scattertext\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can change the formula which is used to produce the Characteristic scores\nusing the \u003ccode\u003echaracteristic_scorer\u003c/code\u003e parameter to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIt takes a instance of a descendant of the \u003ccode\u003eCharacteristicScorer\u003c/code\u003e class. See\n\u003ca href=\"https://github.com/JasonKessler/scattertext/blob/8ddff82f670aa2ed40312b2cdd077e7f0a98a873/scattertext/characteristic/DenseRankCharacteristicness.py#L36\"\u003eDenseRankCharacteristicness.py\u003c/a\u003e\nfor an example of how to make your own.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eExample of plotting with a modified characteristic scorer,\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_scattertext_explorer(\n corpus,\n category='__label__meta_plus_m',\n category_name='Plus-M',\n not_category_name='Minus-M',\n minimum_term_frequency=1,\n transform=st.Scalers.dense_rank,\n characteristic_scorer=st.DenseRankCharacteristicness(),\n \tterm_ranker=st.termranking.AbsoluteFrequencyRanker,\n\tterm_scorer=st.ScaledFScorePresets(beta=1, one_to_neg_one=True)\n).encode('utf-8'))\nprint('open ' + fn)\n\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'__label__meta_plus_m'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Plus-M'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Minus-M'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eminimum_term_frequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etransform\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edense_rank\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003echaracteristic_scorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eDenseRankCharacteristicness\u003c/span\u003e(),\n \t\u003cspan class=\"pl-s1\"\u003eterm_ranker\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etermranking\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eAbsoluteFrequencyRanker\u003c/span\u003e,\n\t\u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScaledFScorePresets\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ebeta\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eone_to_neg_one\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e)\n).\u003cspan class=\"pl-c1\"\u003eencode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e))\n\u003cspan class=\"pl-k\"\u003eprint\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'open '\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_dense_rank_characteristic.png\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_dense_rank_characteristic.png\" alt=\"Polish Scattertext DenseRank\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that numbers show up as more characteristic using the Dense Rank Difference. It may be they occur\nunusually frequently in this corpus, or perhaps the background word frequencies under counted mumbers.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003ePlotting word productivity\u003c/h3\u003e\u003ca id=\"user-content-plotting-word-productivity\" class=\"anchor\" aria-label=\"Permalink: Plotting word productivity\" href=\"#plotting-word-productivity\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWord productivity is one strategy for plotting word-based charts describing an uncategorized corpus.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eProductivity is defined in Schumann (2016) (Jason: check this) as the entropy of ngrams\nwhich contain a term. For the entropy computation, the probability of an n-gram wrt the term whose productivity is\nbeing calculated is the frequency of the n-gram divided by the term's frequency.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSince productivity highly correlates with frequency, the recommended metric to plot is the dense rank difference between\nfrequency and productivity.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe snippet below plots words in the convention corpus based on their log frequency and their productivity.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe function \u003ccode\u003est.whole_corpus_productivity_scores\u003c/code\u003e returns a DataFrame giving each word's productivity. For example,\nin the convention corpus,\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eProductivity scores should be calculated on a \u003ccode\u003eCorpus\u003c/code\u003e-like object which contains a complete set of unigrams and at\nleast bigrams. This corpus should not be compacted before the productivity score calculation.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe terms with lower productivity have more limited usage (e.g., \"thank\" for \"thank you\", \"united\"\nfor \"united steates\") while the terms with higher productivity occurr in a wider varity of contexts (\"getting\",\n\"actually\", \"political\", etc.).\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import spacy\nimport scattertext as st\n\ncorpus_no_cat = st.CorpusWithoutCategoriesFromParsedDocuments(\n st.SampleCorpora.ConventionData2012.get_data().assign(\n Parse=lambda df: [x for x in spacy.load('en_core_web_sm').pipe(df.text)]),\n parsed_col='Parse'\n).build()\n\ncompact_corpus_no_cat = corpus_no_cat.get_stoplisted_unigram_corpus().remove_infrequent_words(9)\n\nplot_df = st.whole_corpus_productivity_scores(corpus_no_cat).assign(\n RankDelta=lambda df: st.RankDifference().get_scores(\n a=df.Productivity,\n b=df.Frequency\n )\n).reindex(\n compact_corpus_no_cat.get_terms()\n).dropna().assign(\n X=lambda df: df.Frequency,\n Xpos=lambda df: st.Scalers.log_scale(df.Frequency),\n Y=lambda df: df.RankDelta,\n Ypos=lambda df: st.Scalers.scale(df.RankDelta),\n)\n\nhtml = st.dataframe_scattertext(\n compact_corpus_no_cat.whitelist_terms(plot_df.index),\n plot_df=plot_df,\n metadata=lambda df: df.get_df()['speaker'],\n ignore_categories=True,\n x_label='Rank Frequency',\n y_label=\u0026quot;Productivity\u0026quot;,\n left_list_column='Ypos',\n color_score_column='Ypos',\n y_axis_labels=['Least Productive', 'Average Productivity', 'Most Productive'],\n header_names={'upper': 'Most Productive', 'lower': 'Least Productive', 'right': 'Characteristic'},\n horizontal_line_y_position=0\n)\n\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espacy\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003ecorpus_no_cat\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusWithoutCategoriesFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-v\"\u003eParse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: [\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espacy\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eload\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'en_core_web_sm'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003epipe\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e)]),\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Parse'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n\n\u003cspan class=\"pl-s1\"\u003ecompact_corpus_no_cat\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus_no_cat\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_stoplisted_unigram_corpus\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eremove_infrequent_words\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e9\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhole_corpus_productivity_scores\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus_no_cat\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-v\"\u003eRankDelta\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDifference\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_scores\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ea\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eProductivity\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eb\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFrequency\u003c/span\u003e\n )\n).\u003cspan class=\"pl-c1\"\u003ereindex\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecompact_corpus_no_cat\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()\n).\u003cspan class=\"pl-c1\"\u003edropna\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFrequency\u003c/span\u003e,\n \u003cspan class=\"pl-v\"\u003eXpos\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003elog_scale\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFrequency\u003c/span\u003e),\n \u003cspan class=\"pl-c1\"\u003eY\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDelta\u003c/span\u003e,\n \u003cspan class=\"pl-v\"\u003eYpos\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003escale\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDelta\u003c/span\u003e),\n)\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edataframe_scattertext\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecompact_corpus_no_cat\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitelist_terms\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eplot_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eignore_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Rank Frequency'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"Productivity\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eleft_list_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Ypos'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecolor_score_column\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Ypos'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_axis_labels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Least Productive'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Average Productivity'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Most Productive'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eheader_names\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e{\u003cspan class=\"pl-s\"\u003e'upper'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Most Productive'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'lower'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Least Productive'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'right'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Characteristic'\u003c/span\u003e},\n \u003cspan class=\"pl-s1\"\u003ehorizontal_line_y_position\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/convention_single_category_productivity.png\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/convention_single_category_productivity.html\" alt=\"Productivity\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUnderstanding Scaled F-Score\u003c/h3\u003e\u003ca id=\"user-content-understanding-scaled-f-score\" class=\"anchor\" aria-label=\"Permalink: Understanding Scaled F-Score\" href=\"#understanding-scaled-f-score\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eLet's now turn our attention to a novel term scoring metric, Scaled F-Score. We'll examine this on a unigram\nversion of the Rotten Tomatoes corpus (Pang et al. 2002). It contains excerpts of\npositive and negative movie reviews.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003ePlease\nsee \u003ca href=\"http://nbviewer.jupyter.org/github/JasonKessler/GlobalAI2018/blob/master/notebook/Scaled-F-Score-Explanation.ipynb\" rel=\"nofollow\"\u003eScaled F Score Explanation\u003c/a\u003e\nfor a notebook version of this analysis.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs1.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs1.png\" alt=\"Scaled F-Score Explanation 1\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"from scipy.stats import hmean\n\nterm_freq_df = corpus.get_unigram_corpus().get_term_freq_df()[['Positive freq', 'Negative freq']]\nterm_freq_df = term_freq_df[term_freq_df.sum(axis=1) \u0026gt; 0]\n\nterm_freq_df['pos_precision'] = (term_freq_df['Positive freq'] * 1. /\n (term_freq_df['Positive freq'] + term_freq_df['Negative freq']))\n\nterm_freq_df['pos_freq_pct'] = (term_freq_df['Positive freq'] * 1.\n / term_freq_df['Positive freq'].sum())\n\nterm_freq_df['pos_hmean'] = (term_freq_df\n .apply(lambda x: (hmean([x['pos_precision'], x['pos_freq_pct']])\n if x['pos_precision'] \u0026gt; 0 and x['pos_freq_pct'] \u0026gt; 0\n else 0), axis=1))\nterm_freq_df.sort_values(by='pos_hmean', ascending=False).iloc[:10]\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escipy\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003estats\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ehmean\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_term_freq_df\u003c/span\u003e()[[\u003cspan class=\"pl-s\"\u003e'Positive freq'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'Negative freq'\u003c/span\u003e]]\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esum\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eaxis\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e]\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_precision'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Positive freq'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1.\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e\n (\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Positive freq'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative freq'\u003c/span\u003e]))\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_freq_pct'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Positive freq'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1.\u003c/span\u003e\n \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Positive freq'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003esum\u003c/span\u003e())\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_hmean'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e\n .\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e: (\u003cspan class=\"pl-en\"\u003ehmean\u003c/span\u003e([\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_precision'\u003c/span\u003e], \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_freq_pct'\u003c/span\u003e]])\n \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_precision'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003eand\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_freq_pct'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e\n \u003cspan class=\"pl-k\"\u003eelse\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e), \u003cspan class=\"pl-s1\"\u003eaxis\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e))\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'pos_hmean'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eascending\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[:\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs2.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs2.png\" alt=\"SFS2\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIf we plot term frequency on the x-axis and the percentage of a term's occurrences\nwhich are in positive documents (i.e., its precision) on the y-axis, we can see\nthat low-frequency terms have a much higher variation in the precision. Given these terms have\nlow frequencies, the harmonic means are low. Thus, the only terms which have a high harmonic mean\nare extremely frequent words which tend to all have near average precisions.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"freq = term_freq_df.pos_freq_pct.values\nprec = term_freq_df.pos_precision.values\nhtml = st.produce_scattertext_explorer(\n corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)),\n category='Positive',\n not_category_name='Negative',\n not_categories=['Negative'],\n\n x_label='Portion of words used in positive reviews',\n original_x=freq,\n x_coords=(freq - freq.min()) / freq.max(),\n x_axis_values=[int(freq.min() * 1000) / 1000.,\n int(freq.max() * 1000) / 1000.],\n\n y_label='Portion of documents containing word that are positive',\n original_y=prec,\n y_coords=(prec - prec.min()) / prec.max(),\n y_axis_values=[int(prec.min() * 1000) / 1000.,\n int((prec.max() / 2.) * 1000) / 1000.,\n int(prec.max() * 1000) / 1000.],\n scores=term_freq_df.pos_hmean.values,\n\n sort_by_dist=False,\n show_characteristic=False\n)\nfile_name = 'not_normed_freq_prec.html'\nopen(file_name, 'wb').write(html.encode('utf-8'))\nIFrame(src=file_name, width=1300, height=700)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_freq_pct\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_precision\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eremove_terms\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Portion of words used in positive reviews'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_x\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003ex_axis_values\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e,\n \u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Portion of documents containing word that are positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_y\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003ey_axis_values\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e,\n \u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e((\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e2.\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e,\n \u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_hmean\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e,\n\n \u003cspan class=\"pl-s1\"\u003esort_by_dist\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eshow_characteristic\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e\n)\n\u003cspan class=\"pl-s1\"\u003efile_name\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'not_normed_freq_prec.html'\u003c/span\u003e\n\u003cspan class=\"pl-en\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efile_name\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'wb'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ewrite\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eencode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e))\n\u003cspan class=\"pl-en\"\u003eIFrame\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esrc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003efile_name\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ewidth\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1300\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eheight\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e700\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs3.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs3.png\" alt=\"SFS3\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs4.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs4.png\" alt=\"SFS4\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"from scipy.stats import norm\n\n\ndef normcdf(x):\n return norm.cdf(x, x.mean(), x.std())\n\n\nterm_freq_df['pos_precision_normcdf'] = normcdf(term_freq_df.pos_precision)\n\nterm_freq_df['pos_freq_pct_normcdf'] = normcdf(term_freq_df.pos_freq_pct.values)\n\nterm_freq_df['pos_scaled_f_score'] = hmean(\n [term_freq_df['pos_precision_normcdf'], term_freq_df['pos_freq_pct_normcdf']])\n\nterm_freq_df.sort_values(by='pos_scaled_f_score', ascending=False).iloc[:10]\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escipy\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003estats\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enorm\u003c/span\u003e\n\n\n\u003cspan class=\"pl-k\"\u003edef\u003c/span\u003e \u003cspan class=\"pl-en\"\u003enormcdf\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e):\n \u003cspan class=\"pl-k\"\u003ereturn\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enorm\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecdf\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emean\u003c/span\u003e(), \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003estd\u003c/span\u003e())\n\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_precision_normcdf'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003enormcdf\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_precision\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_freq_pct_normcdf'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003enormcdf\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_freq_pct\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003ehmean\u003c/span\u003e(\n [\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_precision_normcdf'\u003c/span\u003e], \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_freq_pct_normcdf'\u003c/span\u003e]])\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'pos_scaled_f_score'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eascending\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[:\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs5.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs5.png\" alt=\"SFS5\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"freq = term_freq_df.pos_freq_pct_normcdf.values\nprec = term_freq_df.pos_precision_normcdf.values\nhtml = st.produce_scattertext_explorer(\n corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)),\n category='Positive',\n not_category_name='Negative',\n not_categories=['Negative'],\n\n x_label='Portion of words used in positive reviews (norm-cdf)',\n original_x=freq,\n x_coords=(freq - freq.min()) / freq.max(),\n x_axis_values=[int(freq.min() * 1000) / 1000.,\n int(freq.max() * 1000) / 1000.],\n\n y_label='documents containing word that are positive (norm-cdf)',\n original_y=prec,\n y_coords=(prec - prec.min()) / prec.max(),\n y_axis_values=[int(prec.min() * 1000) / 1000.,\n int((prec.max() / 2.) * 1000) / 1000.,\n int(prec.max() * 1000) / 1000.],\n scores=term_freq_df.pos_scaled_f_score.values,\n\n sort_by_dist=False,\n show_characteristic=False\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_freq_pct_normcdf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_precision_normcdf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eremove_terms\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Portion of words used in positive reviews (norm-cdf)'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_x\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003ex_axis_values\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e,\n \u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'documents containing word that are positive (norm-cdf)'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_y\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003ey_axis_values\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e,\n \u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e((\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e2.\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e,\n \u003cspan class=\"pl-en\"\u003eint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1000.\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_scaled_f_score\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e,\n\n \u003cspan class=\"pl-s1\"\u003esort_by_dist\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eshow_characteristic\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs6.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs6.png\" alt=\"SFS6\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs7.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs7.png\" alt=\"SFS7\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"term_freq_df['neg_precision_normcdf'] = normcdf((term_freq_df['Negative freq'] * 1. /\n (term_freq_df['Negative freq'] + term_freq_df['Positive freq'])))\n\nterm_freq_df['neg_freq_pct_normcdf'] = normcdf((term_freq_df['Negative freq'] * 1.\n / term_freq_df['Negative freq'].sum()))\n\nterm_freq_df['neg_scaled_f_score'] = hmean(\n [term_freq_df['neg_precision_normcdf'], term_freq_df['neg_freq_pct_normcdf']])\n\nterm_freq_df['scaled_f_score'] = 0\nterm_freq_df.loc[term_freq_df['pos_scaled_f_score'] \u0026gt; term_freq_df['neg_scaled_f_score'],\n 'scaled_f_score'] = term_freq_df['pos_scaled_f_score']\nterm_freq_df.loc[term_freq_df['pos_scaled_f_score'] \u0026lt; term_freq_df['neg_scaled_f_score'],\n 'scaled_f_score'] = 1 - term_freq_df['neg_scaled_f_score']\nterm_freq_df['scaled_f_score'] = 2 * (term_freq_df['scaled_f_score'] - 0.5)\nterm_freq_df.sort_values(by='scaled_f_score', ascending=True).iloc[:10]\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_precision_normcdf'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003enormcdf\u003c/span\u003e((\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative freq'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1.\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e\n (\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative freq'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Positive freq'\u003c/span\u003e])))\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_freq_pct_normcdf'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003enormcdf\u003c/span\u003e((\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative freq'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1.\u003c/span\u003e\n \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative freq'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003esum\u003c/span\u003e()))\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003ehmean\u003c/span\u003e(\n [\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_precision_normcdf'\u003c/span\u003e], \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_freq_pct_normcdf'\u003c/span\u003e]])\n\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eloc\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_scaled_f_score'\u003c/span\u003e],\n \u003cspan class=\"pl-s\"\u003e'scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_scaled_f_score'\u003c/span\u003e]\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eloc\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'pos_scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e\u0026lt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_scaled_f_score'\u003c/span\u003e],\n \u003cspan class=\"pl-s\"\u003e'scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'neg_scaled_f_score'\u003c/span\u003e]\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'scaled_f_score'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.5\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esort_values\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eby\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'scaled_f_score'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eascending\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[:\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs8.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs8.png\" alt=\"SFS8\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"is_pos = term_freq_df.pos_scaled_f_score \u0026gt; term_freq_df.neg_scaled_f_score\nfreq = term_freq_df.pos_freq_pct_normcdf * is_pos - term_freq_df.neg_freq_pct_normcdf * ~is_pos\nprec = term_freq_df.pos_precision_normcdf * is_pos - term_freq_df.neg_precision_normcdf * ~is_pos\n\n\ndef scale(ar):\n return (ar - ar.min()) / (ar.max() - ar.min())\n\n\ndef close_gap(ar):\n ar[ar \u0026gt; 0] -= ar[ar \u0026gt; 0].min()\n ar[ar \u0026lt; 0] -= ar[ar \u0026lt; 0].max()\n return ar\n\n\nhtml = st.produce_scattertext_explorer(\n corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)),\n category='Positive',\n not_category_name='Negative',\n not_categories=['Negative'],\n\n x_label='Frequency',\n original_x=freq,\n x_coords=scale(close_gap(freq)),\n x_axis_labels=['Frequent in Neg',\n 'Not Frequent',\n 'Frequent in Pos'],\n\n y_label='Precision',\n original_y=prec,\n y_coords=scale(close_gap(prec)),\n y_axis_labels=['Neg Precise',\n 'Imprecise',\n 'Pos Precise'],\n\n scores=(term_freq_df.scaled_f_score.values + 1) / 2,\n sort_by_dist=False,\n show_characteristic=False\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eis_pos\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_scaled_f_score\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eneg_scaled_f_score\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_freq_pct_normcdf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eis_pos\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eneg_freq_pct_normcdf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e~\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eis_pos\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epos_precision_normcdf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eis_pos\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eneg_precision_normcdf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e*\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e~\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eis_pos\u003c/span\u003e\n\n\n\u003cspan class=\"pl-k\"\u003edef\u003c/span\u003e \u003cspan class=\"pl-en\"\u003escale\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e):\n \u003cspan class=\"pl-k\"\u003ereturn\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e() \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e())\n\n\n\u003cspan class=\"pl-k\"\u003edef\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eclose_gap\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e):\n \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e-=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()\n \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026lt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e-=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026lt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e()\n \u003cspan class=\"pl-k\"\u003ereturn\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ear\u003c/span\u003e\n\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eremove_terms\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Frequency'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_x\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-en\"\u003escale\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eclose_gap\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efreq\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ex_axis_labels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Frequent in Neg'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'Not Frequent'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'Frequent in Pos'\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Precision'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_y\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-en\"\u003escale\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eclose_gap\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eprec\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ey_axis_labels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Neg Precise'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'Imprecise'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'Pos Precise'\u003c/span\u003e],\n\n \u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003escaled_f_score\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esort_by_dist\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eshow_characteristic\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs9.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs9.png\" alt=\"SFS9\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can use \u003ccode\u003est.ScaledFScorePresets\u003c/code\u003e as a term scorer to display terms' Scaled F-Score on the y-axis and\nterm frequencies on the x-axis.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_frequency_explorer(\n corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)),\n category='Positive',\n not_category_name='Negative',\n not_categories=['Negative'],\n term_scorer=st.ScaledFScorePresets(beta=1, one_to_neg_one=True),\n metadata=rdf['movie_name'],\n grey_threshold=0\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_frequency_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eremove_terms\u003c/span\u003e(\u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eset\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_freq_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eindex\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScaledFScorePresets\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ebeta\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eone_to_neg_one\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003erdf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'movie_name'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003egrey_threshold\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs10.png\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs10.png\" alt=\"SFS10\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAlternative term scoring methods\u003c/h3\u003e\u003ca id=\"user-content-alternative-term-scoring-methods\" class=\"anchor\" aria-label=\"Permalink: Alternative term scoring methods\" href=\"#alternative-term-scoring-methods\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eScaled F-Score is not the only scoring method included in Scattertext. Please click on one of the links below to\nview a notebook which describes how other class association scores work and can be visualized through Scattertext.\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ca href=\"https://colab.research.google.com/drive/1snxAP8X6EIDi42FugJ_h5U-fBGDCqtyS\" rel=\"nofollow\"\u003eGoogle Colab Notebook\u003c/a\u003e (recommend).\u003c/li\u003e\n\u003cli\u003e\u003ca href=\"https://colab.research.google.com/drive/1snxAP8X6EIDi42FugJ_h5U-fBGDCqtyS\" rel=\"nofollow\"\u003eJupyter Notebook via NBViewer\u003c/a\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cp dir=\"auto\"\u003eNew in 0.0.2.73 is the delta JS-Divergence scorer \u003ccode\u003eDeltaJSDivergence\u003c/code\u003e scorer (Gallagher et al. 2020), and its\ncorresponding compactor (JSDCompactor.) See \u003ccode\u003edemo_deltajsd.py\u003c/code\u003e for an example usage.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eThe position-select-plot process\u003c/h3\u003e\u003ca id=\"user-content-the-position-select-plot-process\" class=\"anchor\" aria-label=\"Permalink: The position-select-plot process\" href=\"#the-position-select-plot-process\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNew in 0.0.2.72\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eScattertext was originally set up to visualize corpora objects, which are connected sets of documents and\nterms to visualize. The \"compaction\" process allows users to eliminate terms which may not be associated with a\ncategory using a variety of feature selection methods. The issue with this is that the terms eliminated during\nthe selection process are not taken into account when scaling term positions.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThis issue can be mitigated by using the position-select-plot process, where term positions are pre-determined\nbefore the selection process is made.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eLet's first use the 2012 conventions corpus, update the category names, and create a unigram corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\nimport numpy as np\n\ndf = st.SampleCorpora.ConventionData2012.get_data().assign(\n parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n).assign(party=lambda df: df['party'].apply({'democrat': 'Democratic', 'republican': 'Republican'}.get))\n\ncorpus = st.CorpusFromParsedDocuments(\n df, category_col='party', parsed_col='parse'\n).build().get_unigram_corpus()\n\ncategory_name = 'Democratic'\nnot_category_name = 'Republican'\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enumpy\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enp\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eparse\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n).\u003cspan class=\"pl-c1\"\u003eassign\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eparty\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e({\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'republican'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e}.\u003cspan class=\"pl-c1\"\u003eget\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e()\n\n\u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, let's create a dataframe consisting of the original counts and their log-scale positions.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"def get_log_scale_df(corpus, y_category, x_category):\n term_coord_df = corpus.get_term_freq_df('')\n\n # Log scale term counts (with a smoothing constant) as the initial coordinates\n coord_columns = []\n for category in [y_category, x_category]:\n col_name = category + '_coord'\n term_coord_df[col_name] = np.log(term_coord_df[category] + 1e-6) / np.log(2)\n coord_columns.append(col_name)\n\n # Scale these coordinates to between 0 and 1\n min_offset = term_coord_df[coord_columns].min(axis=0).min()\n for coord_column in coord_columns:\n term_coord_df[coord_column] -= min_offset\n max_offset = term_coord_df[coord_columns].max(axis=0).max()\n for coord_column in coord_columns:\n term_coord_df[coord_column] /= max_offset\n return term_coord_df\n\n\n# Get term coordinates from original corpus\nterm_coordinates = get_log_scale_df(corpus, category_name, not_category_name)\nprint(term_coordinates)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003edef\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eget_log_scale_df\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ey_category\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ex_category\u003c/span\u003e):\n \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_term_freq_df\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e''\u003c/span\u003e)\n\n \u003cspan class=\"pl-c\"\u003e# Log scale term counts (with a smoothing constant) as the initial coordinates\u003c/span\u003e\n \u003cspan class=\"pl-s1\"\u003ecoord_columns\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e []\n \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e [\u003cspan class=\"pl-s1\"\u003ey_category\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ex_category\u003c/span\u003e]:\n \u003cspan class=\"pl-s1\"\u003ecol_name\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'_coord'\u003c/span\u003e\n \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecol_name\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enp\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003elog\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e1e-6\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enp\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003elog\u003c/span\u003e(\u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e)\n \u003cspan class=\"pl-s1\"\u003ecoord_columns\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eappend\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecol_name\u003c/span\u003e)\n\n \u003cspan class=\"pl-c\"\u003e# Scale these coordinates to between 0 and 1\u003c/span\u003e\n \u003cspan class=\"pl-s1\"\u003emin_offset\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecoord_columns\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eaxis\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003emin\u003c/span\u003e()\n \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecoord_column\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecoord_columns\u003c/span\u003e:\n \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecoord_column\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e-=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003emin_offset\u003c/span\u003e\n \u003cspan class=\"pl-s1\"\u003emax_offset\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecoord_columns\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eaxis\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003emax\u003c/span\u003e()\n \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecoord_column\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecoord_columns\u003c/span\u003e:\n \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecoord_column\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e/=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003emax_offset\u003c/span\u003e\n \u003cspan class=\"pl-k\"\u003ereturn\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_coord_df\u003c/span\u003e\n\n\n\u003cspan class=\"pl-c\"\u003e# Get term coordinates from original corpus\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eget_log_scale_df\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e)\n\u003cspan class=\"pl-en\"\u003eprint\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eHere is a preview of the \u003ccode\u003eterm_coordinates\u003c/code\u003e dataframe. The \u003ccode\u003eDemocrat\u003c/code\u003e and\n\u003ccode\u003eRepublican\u003c/code\u003e columns contain the term counts, while the \u003ccode\u003e_coord\u003c/code\u003e columns\ncontain their logged coordinates. Visualizing 7,973 terms is difficult (but\npossible) for people running Scattertext on most computers.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\" Democratic Republican Democratic_coord Republican_coord\nterm\nthank 158 205 0.860166 0.872032\nyou 836 794 0.936078 0.933729\nso 337 212 0.894681 0.873562\nmuch 84 76 0.831380 0.826820\nvery 62 75 0.817543 0.826216\n... ... ... ... ...\nprecinct 0 2 0.000000 0.661076\ngodspeed 0 1 0.000000 0.629493\nbeauty 0 1 0.000000 0.629493\nbumper 0 1 0.000000 0.629493\nsticker 0 1 0.000000 0.629493\n\n[7973 rows x 4 columns]\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e Democratic Republican Democratic_coord Republican_coord\nterm\nthank 158 205 0.860166 0.872032\nyou 836 794 0.936078 0.933729\nso 337 212 0.894681 0.873562\nmuch 84 76 0.831380 0.826820\nvery 62 75 0.817543 0.826216\n... ... ... ... ...\nprecinct 0 2 0.000000 0.661076\ngodspeed 0 1 0.000000 0.629493\nbeauty 0 1 0.000000 0.629493\nbumper 0 1 0.000000 0.629493\nsticker 0 1 0.000000 0.629493\n\n[7973 rows x 4 columns]\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe can visualize this full data set by running the following code block. We'll create a custom\nJavascript function to populate the tooltip with the original term counts, and create a\nScattertext Explorer where the x and y coordinates and original values are specified from the data\nframe. Additionally, we can use \u003ccode\u003eshow_diagonal=True\u003c/code\u003e to draw a dashed diagonal line across the plot area.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eYou can click the chart below to see the interactive version. Note that it will take a while to load.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"# The tooltip JS function. Note that d is is the term data object, and ox and oy are the original x- and y-\n# axis counts.\nget_tooltip_content = ('(function(d) {return d.term + \u0026quot;\u0026lt;br/\u0026gt;' + not_category_name + ' Count: \u0026quot; ' +\n '+ d.ox +\u0026quot;\u0026lt;br/\u0026gt;' + category_name + ' Count: \u0026quot; + d.oy})')\n\n\nhtml_orig = st.produce_scattertext_explorer(\n corpus,\n category=category_name,\n not_category_name=not_category_name,\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n metadata=corpus.get_df()['speaker'],\n show_diagonal=True,\n original_y=term_coordinates[category_name],\n original_x=term_coordinates[not_category_name],\n x_coords=term_coordinates[category_name + '_coord'],\n y_coords=term_coordinates[not_category_name + '_coord'],\n max_overlapping=3,\n use_global_scale=True,\n get_tooltip_content=get_tooltip_content,\n)\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e# The tooltip JS function. Note that d is is the term data object, and ox and oy are the original x- and y-\n# axis counts.\nget_tooltip_content = ('(function(d) {return d.term + \"\u0026lt;br/\u0026gt;' + not_category_name + ' Count: \" ' +\n '+ d.ox +\"\u0026lt;br/\u0026gt;' + category_name + ' Count: \" + d.oy})')\n\n\nhtml_orig = st.produce_scattertext_explorer(\n corpus,\n category=category_name,\n not_category_name=not_category_name,\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n metadata=corpus.get_df()['speaker'],\n show_diagonal=True,\n original_y=term_coordinates[category_name],\n original_x=term_coordinates[not_category_name],\n x_coords=term_coordinates[category_name + '_coord'],\n y_coords=term_coordinates[not_category_name + '_coord'],\n max_overlapping=3,\n use_global_scale=True,\n get_tooltip_content=get_tooltip_content,\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_global_scale_log_orig.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_global_scale_log_orig.png\" alt=\"demo_global_scale_log_orig.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNext, we can visualize the compacted version of the corpus. The compaction, using \u003ccode\u003eClassPercentageCompactor\u003c/code\u003e,\nselects terms which frequently in each category. The \u003ccode\u003eterm_count\u003c/code\u003e parameter, set to 2, is used to determine\nthe percentage threshold for terms to keep in a particular category. This is done using by calculating the\npercentile of terms (types) in each category which appear more than two times. We find the smallest percentile,\nand only include terms which occur above that percentile in a given category.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that this compaction leaves only 2,828 terms. This number is much easier for Scattertext to display\nin a browser.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"# Select terms which appear a minimum threshold in both corpora\ncompact_corpus = corpus.compact(st.ClassPercentageCompactor(term_count=2))\n\n# Only take term coordinates of terms remaining in corpus\nterm_coordinates = term_coordinates.loc[compact_corpus.get_terms()]\n\nhtml_compact = st.produce_scattertext_explorer(\n compact_corpus,\n category=category_name,\n not_category_name=not_category_name,\n minimum_term_frequency=0,\n pmi_threshold_coefficient=0,\n width_in_pixels=1000,\n metadata=corpus.get_df()['speaker'],\n show_diagonal=True,\n original_y=term_coordinates[category_name],\n original_x=term_coordinates[not_category_name],\n x_coords=term_coordinates[category_name + '_coord'],\n y_coords=term_coordinates[not_category_name + '_coord'],\n max_overlapping=3,\n use_global_scale=True,\n get_tooltip_content=get_tooltip_content,\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-c\"\u003e# Select terms which appear a minimum threshold in both corpora\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ecompact_corpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecompact\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eClassPercentageCompactor\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_count\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e))\n\n\u003cspan class=\"pl-c\"\u003e# Only take term coordinates of terms remaining in corpus\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eloc\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecompact_corpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_terms\u003c/span\u003e()]\n\n\u003cspan class=\"pl-s1\"\u003ehtml_compact\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecompact_corpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eminimum_term_frequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003epmi_threshold_coefficient\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_df\u003c/span\u003e()[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eshow_diagonal\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eoriginal_y\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eoriginal_x\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003ex_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'_coord'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003ey_coords\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_coordinates\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'_coord'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003emax_overlapping\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e3\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_global_scale\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eget_tooltip_content\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eget_tooltip_content\u003c/span\u003e,\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_global_scale_log.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_global_scale_log.png\" alt=\"demo_global_scale_log.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eAdvanced uses\u003c/h2\u003e\u003ca id=\"user-content-advanced-uses\" class=\"anchor\" aria-label=\"Permalink: Advanced uses\" href=\"#advanced-uses\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing differences based on only term frequencies\u003c/h3\u003e\u003ca id=\"user-content-visualizing-differences-based-on-only-term-frequencies\" class=\"anchor\" aria-label=\"Permalink: Visualizing differences based on only term frequencies\" href=\"#visualizing-differences-based-on-only-term-frequencies\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eOccasionally, only term frequency statistics are available. This may happen in the case of very large,\nlost, or proprietary data sets. \u003ccode\u003eTermCategoryFrequencies\u003c/code\u003e is a corpus representation,that can accept this\nsort of data, along with any categorized documents that happen to be available.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eLet use the \u003ca href=\"https://corpus.byu.edu/coca/\" rel=\"nofollow\"\u003eCorpus of Contemporary American English\u003c/a\u003e as an example.\u003cbr\u003e\nWe'll construct a visualization\nto analyze the difference between spoken American English and English that occurs in fiction.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"df = (pd.read_excel('https://www.wordfrequency.info/files/genres_sample.xls')\n .dropna()\n .set_index('lemma')[['SPOKEN', 'FICTION']]\n .iloc[:1000])\ndf.head()\n'''\n SPOKEN FICTION\nlemma\nthe 3859682.0 4092394.0\nI 1346545.0 1382716.0\nthey 609735.0 352405.0\nshe 212920.0 798208.0\nwould 233766.0 229865.0\n'''\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eread_excel\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'https://www.wordfrequency.info/files/genres_sample.xls'\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003edropna\u003c/span\u003e()\n .\u003cspan class=\"pl-c1\"\u003eset_index\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'lemma'\u003c/span\u003e)[[\u003cspan class=\"pl-s\"\u003e'SPOKEN'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'FICTION'\u003c/span\u003e]]\n .\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[:\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e])\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ehead\u003c/span\u003e()\n\u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e SPOKEN FICTION\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003elemma\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003ethe 3859682.0 4092394.0\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eI 1346545.0 1382716.0\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003ethey 609735.0 352405.0\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eshe 212920.0 798208.0\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003ewould 233766.0 229865.0\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTransforming this into a visualization is extremely easy. Just pass a dataframe indexed on\nterms with columns indicating category-counts into the the \u003ccode\u003eTermCategoryFrequencies\u003c/code\u003e constructor.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"term_cat_freq = st.TermCategoryFrequencies(df)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eterm_cat_freq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eTermCategoryFrequencies\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAnd call \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e normally:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_scattertext_explorer(\n term_cat_freq,\n category='SPOKEN',\n category_name='Spoken',\n not_category_name='Fiction',\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003eterm_cat_freq\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'SPOKEN'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Spoken'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Fiction'\u003c/span\u003e,\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_category_frequencies.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/1d763c1a79f26c2a707f3bd822d241a7a29e7a69f998a2d6fe5e69543b098ac4/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f63617465676f72795f6672657175656e636965732e706e67\" alt=\"demo_category_frequencies.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_category_frequencies.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIf you'd like to incorporate some documents into the visualization, you can add them into to the\n\u003ccode\u003eTermCategoyFrequencies\u003c/code\u003e object.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, let's extract some example Fiction and Spoken documents from the sample COCA corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import requests, zipfile, io\n\ncoca_sample_url = 'http://corpus.byu.edu/cocatext/samples/text.zip'\nzip_file = zipfile.ZipFile(io.BytesIO(requests.get(coca_sample_url).content))\n\ndocument_df = pd.DataFrame(\n [{'text': zip_file.open(fn).read().decode('utf-8'),\n 'category': 'SPOKEN'}\n for fn in zip_file.filelist if fn.filename.startswith('w_spok')][:2]\n + [{'text': zip_file.open(fn).read().decode('utf-8'),\n 'category': 'FICTION'}\n for fn in zip_file.filelist if fn.filename.startswith('w_fic')][:2])\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003erequests\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ezipfile\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eio\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003ecoca_sample_url\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'http://corpus.byu.edu/cocatext/samples/text.zip'\u003c/span\u003e\n\u003cspan class=\"pl-s1\"\u003ezip_file\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ezipfile\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eZipFile\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eio\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eBytesIO\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003erequests\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecoca_sample_url\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003econtent\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003edocument_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eDataFrame\u003c/span\u003e(\n [{\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ezip_file\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eread\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003edecode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e),\n \u003cspan class=\"pl-s\"\u003e'category'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'SPOKEN'\u003c/span\u003e}\n \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ezip_file\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efilelist\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efilename\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003estartswith\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'w_spok'\u003c/span\u003e)][:\u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e]\n \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e [{\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ezip_file\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eread\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003edecode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e),\n \u003cspan class=\"pl-s\"\u003e'category'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'FICTION'\u003c/span\u003e}\n \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ezip_file\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efilelist\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efilename\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003estartswith\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'w_fic'\u003c/span\u003e)][:\u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e])\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAnd we'll pass the \u003ccode\u003edocuments_df\u003c/code\u003e dataframe into \u003ccode\u003eTermCategoryFrequencies\u003c/code\u003e via the \u003ccode\u003edocument_category_df\u003c/code\u003e\nparameter. Ensure the dataframe has two columns, 'text' and 'category'. Afterward, we can\ncall \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e (or your visualization function of choice) normally.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"doc_term_cat_freq = st.TermCategoryFrequencies(df, document_category_df=document_df)\n\nhtml = st.produce_scattertext_explorer(\n doc_term_cat_freq,\n category='SPOKEN',\n category_name='Spoken',\n not_category_name='Fiction',\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003edoc_term_cat_freq\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eTermCategoryFrequencies\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003edocument_category_df\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003edocument_df\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edoc_term_cat_freq\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'SPOKEN'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Spoken'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Fiction'\u003c/span\u003e,\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing query-based categorical differences\u003c/h3\u003e\u003ca id=\"user-content-visualizing-query-based-categorical-differences\" class=\"anchor\" aria-label=\"Permalink: Visualizing query-based categorical differences\" href=\"#visualizing-query-based-categorical-differences\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWord representations have recently become a hot topic in NLP. While lots of work has been done visualizing\nhow terms relate to one another given their scores\n(e.g., \u003ca href=\"http://projector.tensorflow.org/\" rel=\"nofollow\"\u003ehttp://projector.tensorflow.org/\u003c/a\u003e),\nnone to my knowledge has been done visualizing how we can use these to examine how\ndocument categories differ.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn this example given a query term, \"jobs\", we can see how Republicans and\nDemocrats talk about it differently.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn this configuration of Scattertext, words are colored by their similarity to a query phrase.\u003cbr\u003e\nThis is done using \u003ca href=\"https://spacy.io/\" rel=\"nofollow\"\u003espaCy\u003c/a\u003e-provided GloVe word vectors (trained on\nthe Common Crawl corpus). The cosine distance between vectors is used,\nwith mean vectors used for phrases.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe calculation of the most similar terms associated with each category is a simple heuristic. First,\nsets of terms closely associated with a category are found. Second, these terms are ranked\nbased on their similarity to the query, and the top rank terms are displayed to the right of the\nscatterplot.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eA term is considered associated if its p-value is less than 0.05. P-values are\ndetermined using Monroe et al. (2008)'s difference in the weighted log-odds-ratios with an\nuninformative Dirichlet prior. This is the only model-based method discussed in Monroe et al.\nthat does not rely on a large, in-domain background corpus. Since we are scoring\nbigrams in addition to the unigrams scored by Monroe, the size of the corpus would have to be larger\nto have high enough bigram counts for proper penalization. This function\nrelies the Dirichlet distribution's parameter alpha, a vector, which is uniformly set to 0.01.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eHere is the code to produce such a visualization.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; from scattertext import word_similarity_explorer\n\u0026gt;\u0026gt;\u0026gt; html = word_similarity_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... target_term='jobs',\n... minimum_term_frequency=5,\n... pmi_threshold_coefficient=4,\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'],\n... alpha=0.01,\n... max_p_val=0.05,\n... save_svg_button=True)\n\u0026gt;\u0026gt;\u0026gt; open(\u0026quot;Convention-Visualization-Jobs.html\u0026quot;, 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; from scattertext import word_similarity_explorer\n\u0026gt;\u0026gt;\u0026gt; html = word_similarity_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... target_term='jobs',\n... minimum_term_frequency=5,\n... pmi_threshold_coefficient=4,\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'],\n... alpha=0.01,\n... max_p_val=0.05,\n... save_svg_button=True)\n\u0026gt;\u0026gt;\u0026gt; open(\"Convention-Visualization-Jobs.html\", 'wb').write(html.encode('utf-8'))\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/Convention-Visualization-Jobs.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/bbe72fe07172e1c0edeb0ef26a30147847d86f7158d0564a4daf7ac61fbc8741/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e2d56697375616c697a6174696f6e2d4a6f62732e706e67\" alt=\"Convention-Visualization-Jobs.html\" data-canonical-src=\"https://jasonkessler.github.io/Convention-Visualization-Jobs.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eDeveloping and using bespoke word representations\u003c/h4\u003e\u003ca id=\"user-content-developing-and-using-bespoke-word-representations\" class=\"anchor\" aria-label=\"Permalink: Developing and using bespoke word representations\" href=\"#developing-and-using-bespoke-word-representations\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eScattertext can interface with Gensim Word2Vec models. For example, here's a snippet from \u003ccode\u003edemo_gensim_similarity.py\u003c/code\u003e\nwhich illustrates how to train and use a word2vec model on a corpus. Note the similarities produced\nreflect quirks of the corpus, e.g., \"8\" tends to refer to the 8% unemployment rate at the time of the\nconvention.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import spacy\nfrom gensim.models import word2vec\nfrom scattertext import SampleCorpora, word_similarity_explorer_gensim, Word2VecFromParsedCorpus\nfrom scattertext.CorpusFromParsedDocuments import CorpusFromParsedDocuments\n\nnlp = spacy.en.English()\nconvention_df = SampleCorpora.ConventionData2012.get_data()\nconvention_df['parsed'] = convention_df.text.apply(nlp)\ncorpus = CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parsed').build()\nmodel = word2vec.Word2Vec(size=300,\n alpha=0.025,\n window=5,\n min_count=5,\n max_vocab_size=None,\n sample=0,\n seed=1,\n workers=1,\n min_alpha=0.0001,\n sg=1,\n hs=1,\n negative=0,\n cbow_mean=0,\n iter=1,\n null_word=0,\n trim_rule=None,\n sorted_vocab=1)\nhtml = word_similarity_explorer_gensim(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n target_term='jobs',\n minimum_term_frequency=5,\n pmi_threshold_coefficient=4,\n width_in_pixels=1000,\n metadata=convention_df['speaker'],\n word2vec=Word2VecFromParsedCorpus(corpus, model).train(),\n max_p_val=0.05,\n save_svg_button=True)\nopen('./demo_gensim_similarity.html', 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espacy\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003egensim\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003emodels\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eword2vec\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eSampleCorpora\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eword_similarity_explorer_gensim\u003c/span\u003e, \u003cspan class=\"pl-v\"\u003eWord2VecFromParsedCorpus\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e.\u003cspan class=\"pl-v\"\u003eCorpusFromParsedDocuments\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eCorpusFromParsedDocuments\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espacy\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003een\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eEnglish\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'parsed'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parsed'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003emodel\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eword2vec\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eWord2Vec\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esize\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e300\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ealpha\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0.025\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewindow\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e5\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emin_count\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e5\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emax_vocab_size\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eNone\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esample\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eseed\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eworkers\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emin_alpha\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0.0001\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esg\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ehs\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enegative\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecbow_mean\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eiter\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enull_word\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etrim_rule\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eNone\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esorted_vocab\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eword_similarity_explorer_gensim\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etarget_term\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'jobs'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eminimum_term_frequency\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e5\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003epmi_threshold_coefficient\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e4\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eword2vec\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-en\"\u003eWord2VecFromParsedCorpus\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003emodel\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003etrain\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003emax_p_val\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0.05\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esave_svg_button\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e)\n\u003cspan class=\"pl-en\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'./demo_gensim_similarity.html'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'wb'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ewrite\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eencode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e))\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eHow Democrats and Republicans talked differently about \"jobs\" in their 2012 convention speeches.\n\u003ca href=\"https://jasonkessler.github.io/demo_gensim_similarity.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/0755b8c2827ae7943593be421eca3c9050bdab4418b7de9ff7f06f3ac581cccf/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f67656e73696d5f73696d696c61726974792e706e67\" alt=\"Convention-Visualization-Jobs.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_gensim_similarity.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing any kind of term score\u003c/h3\u003e\u003ca id=\"user-content-visualizing-any-kind-of-term-score\" class=\"anchor\" aria-label=\"Permalink: Visualizing any kind of term score\" href=\"#visualizing-any-kind-of-term-score\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe can use Scattertext to visualize alternative types of word scores, and ensure that 0 scores are greyed out. Use\nthe \u003ccode\u003esparse_explroer\u003c/code\u003e function to acomplish this, and see its source code for more details.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; from sklearn.linear_model import Lasso\n\u0026gt;\u0026gt;\u0026gt; from scattertext import sparse_explorer\n\u0026gt;\u0026gt;\u0026gt; html = sparse_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... scores = corpus.get_regression_coefs('democrat', Lasso(max_iter=10000)),\n... minimum_term_frequency=5,\n... pmi_threshold_coefficient=4,\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'])\n\u0026gt;\u0026gt;\u0026gt; open('./Convention-Visualization-Sparse.html', 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; from sklearn.linear_model import Lasso\n\u0026gt;\u0026gt;\u0026gt; from scattertext import sparse_explorer\n\u0026gt;\u0026gt;\u0026gt; html = sparse_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... scores = corpus.get_regression_coefs('democrat', Lasso(max_iter=10000)),\n... minimum_term_frequency=5,\n... pmi_threshold_coefficient=4,\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'])\n\u0026gt;\u0026gt;\u0026gt; open('./Convention-Visualization-Sparse.html', 'wb').write(html.encode('utf-8'))\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/Convention-Visualization-Sparse.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/d48e52dae93e96541a7a2f20e30f35c4d0dcb72f8de6fa3509ead651209bd710/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e2d56697375616c697a6174696f6e2d5370617273652e706e67\" alt=\"Convention-Visualization-Sparse.html\" data-canonical-src=\"https://jasonkessler.github.io/Convention-Visualization-Sparse.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCustom term positions\u003c/h3\u003e\u003ca id=\"user-content-custom-term-positions\" class=\"anchor\" aria-label=\"Permalink: Custom term positions\" href=\"#custom-term-positions\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eYou can also use custom term positions and axis labels. For example, you can base terms' y-axis\npositions on a regression coefficient and their x-axis on term frequency and label the axes\naccordingly. The one catch is that axis positions must be scaled between 0 and 1.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, let's define two scaling functions: \u003ccode\u003escale\u003c/code\u003e to project positive values to [0,1], and\n\u003ccode\u003ezero_centered_scale\u003c/code\u003e project real values to [0,1], with negative values always \u0026lt;0.5, and\npositive values always \u0026gt;0.5.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; def scale(ar):\n... return (ar - ar.min()) / (ar.max() - ar.min())\n...\n\u0026gt;\u0026gt;\u0026gt; def zero_centered_scale(ar):\n... ar[ar \u0026gt; 0] = scale(ar[ar \u0026gt; 0])\n... ar[ar \u0026lt; 0] = -scale(-ar[ar \u0026lt; 0])\n... return (ar + 1) / 2.\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; def scale(ar):\n... return (ar - ar.min()) / (ar.max() - ar.min())\n...\n\u0026gt;\u0026gt;\u0026gt; def zero_centered_scale(ar):\n... ar[ar \u0026gt; 0] = scale(ar[ar \u0026gt; 0])\n... ar[ar \u0026lt; 0] = -scale(-ar[ar \u0026lt; 0])\n... return (ar + 1) / 2.\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, let's compute and scale term frequencies and L2-penalized regression coefficients. We'll\nhang on to the original coefficients and allow users to view them by mousing over terms.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; from sklearn.linear_model import LogisticRegression\n\u0026gt;\u0026gt;\u0026gt; import numpy as np\n\u0026gt;\u0026gt;\u0026gt;\n\u0026gt;\u0026gt;\u0026gt; frequencies_scaled = scale(np.log(term_freq_df.sum(axis=1).values))\n\u0026gt;\u0026gt;\u0026gt; scores = corpus.get_logreg_coefs('democrat',\n... LogisticRegression(penalty='l2', C=10, max_iter=10000, n_jobs=-1))\n\u0026gt;\u0026gt;\u0026gt; scores_scaled = zero_centered_scale(scores)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; from sklearn.linear_model import LogisticRegression\n\u0026gt;\u0026gt;\u0026gt; import numpy as np\n\u0026gt;\u0026gt;\u0026gt;\n\u0026gt;\u0026gt;\u0026gt; frequencies_scaled = scale(np.log(term_freq_df.sum(axis=1).values))\n\u0026gt;\u0026gt;\u0026gt; scores = corpus.get_logreg_coefs('democrat',\n... LogisticRegression(penalty='l2', C=10, max_iter=10000, n_jobs=-1))\n\u0026gt;\u0026gt;\u0026gt; scores_scaled = zero_centered_scale(scores)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFinally, we can write the visualization. Note the use of the \u003ccode\u003ex_coords\u003c/code\u003e and \u003ccode\u003ey_coords\u003c/code\u003e\nparameters to store the respective coordinates, the \u003ccode\u003escores\u003c/code\u003e and \u003ccode\u003esort_by_dist\u003c/code\u003e arguments\nto register the original coefficients and use them to rank the terms in the right-hand\nlist, and the \u003ccode\u003ex_label\u003c/code\u003e and \u003ccode\u003ey_label\u003c/code\u003e arguments to label axes.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; html = produce_scattertext_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... minimum_term_frequency=5,\n... pmi_threshold_coefficient=4,\n... width_in_pixels=1000,\n... x_coords=frequencies_scaled,\n... y_coords=scores_scaled,\n... scores=scores,\n... sort_by_dist=False,\n... metadata=convention_df['speaker'],\n... x_label='Log frequency',\n... y_label='L2-penalized logistic regression coef')\n\u0026gt;\u0026gt;\u0026gt; open('demo_custom_coordinates.html', 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; html = produce_scattertext_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... minimum_term_frequency=5,\n... pmi_threshold_coefficient=4,\n... width_in_pixels=1000,\n... x_coords=frequencies_scaled,\n... y_coords=scores_scaled,\n... scores=scores,\n... sort_by_dist=False,\n... metadata=convention_df['speaker'],\n... x_label='Log frequency',\n... y_label='L2-penalized logistic regression coef')\n\u0026gt;\u0026gt;\u0026gt; open('demo_custom_coordinates.html', 'wb').write(html.encode('utf-8'))\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_custom_coordinates.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/0b5bc3e00051a092c9dd715486f8eeae2703783cae38cfc21b1f759f83a8e1cb/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f637573746f6d5f636f6f7264696e617465732e706e67\" alt=\"demo_custom_coordinates.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_custom_coordinates.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eEmoji analysis\u003c/h3\u003e\u003ca id=\"user-content-emoji-analysis\" class=\"anchor\" aria-label=\"Permalink: Emoji analysis\" href=\"#emoji-analysis\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe Emoji analysis capability displays a chart of the category-specific distribution\nof Emoji. Let's look at a new corpus, a set of tweets. We'll build a visualization\nshowing how men and women use emoji differently.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote: the following example is implemented in \u003ccode\u003edemo_emoji.py\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, we'll load the dataset and parse it using NLTK's tweet tokenizer. Note, install NLTK\nbefore running this example. It will take some time for the dataset to download.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import nltk, urllib.request, io, agefromname, zipfile\nimport scattertext as st\nimport pandas as pd\n\nwith zipfile.ZipFile(io.BytesIO(urllib.request.urlopen(\n 'http://followthehashtag.com/content/uploads/USA-Geolocated-tweets-free-dataset-Followthehashtag.zip'\n).read())) as zf:\n df = pd.read_excel(zf.open('dashboard_x_usa_x_filter_nativeretweets.xlsx'))\n\nnlp = st.tweet_tokenzier_factory(nltk.tokenize.TweetTokenizer())\ndf['parse'] = df['Tweet content'].apply(nlp)\n\ndf.iloc[0]\n'''\nTweet Id 721318437075685382\nDate 2016-04-16\nHour 12:44\nUser Name Bill Schulhoff\nNickname BillSchulhoff\nBio Husband,Dad,GrandDad,Ordained Minister, Umpire...\nTweet content Wind 3.2 mph NNE. Barometer 30.20 in, Rising s...\nFavs NaN\nRTs NaN\nLatitude 40.7603\nLongitude -72.9547\nCountry US\nPlace (as appears on Bio) East Patchogue, NY\nProfile picture http://pbs.twimg.com/profile_images/3788000007...\nFollowers 386\nFollowing 705\nListed 24\nTweet language (ISO 639-1) en\nTweet Url http://www.twitter.com/BillSchulhoff/status/72...\nparse Wind 3.2 mph NNE. Barometer 30.20 in, Rising s...\nName: 0, dtype: object\n'''\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enltk\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eurllib\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003erequest\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eio\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eagefromname\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ezipfile\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epandas\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e\n\n\u003cspan class=\"pl-k\"\u003ewith\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ezipfile\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eZipFile\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eio\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eBytesIO\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eurllib\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003erequest\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eurlopen\u003c/span\u003e(\n \u003cspan class=\"pl-s\"\u003e'http://followthehashtag.com/content/uploads/USA-Geolocated-tweets-free-dataset-Followthehashtag.zip'\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003eread\u003c/span\u003e())) \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ezf\u003c/span\u003e:\n \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eread_excel\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ezf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'dashboard_x_usa_x_filter_nativeretweets.xlsx'\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etweet_tokenzier_factory\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enltk\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etokenize\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eTweetTokenizer\u003c/span\u003e())\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Tweet content'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e]\n\u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eTweet Id 721318437075685382\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eDate 2016-04-16\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eHour 12:44\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eUser Name Bill Schulhoff\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eNickname BillSchulhoff\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eBio Husband,Dad,GrandDad,Ordained Minister, Umpire...\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eTweet content Wind 3.2 mph NNE. Barometer 30.20 in, Rising s...\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eFavs NaN\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eRTs NaN\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eLatitude 40.7603\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eLongitude -72.9547\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eCountry US\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003ePlace (as appears on Bio) East Patchogue, NY\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eProfile picture http://pbs.twimg.com/profile_images/3788000007...\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eFollowers 386\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eFollowing 705\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eListed 24\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eTweet language (ISO 639-1) en\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eTweet Url http://www.twitter.com/BillSchulhoff/status/72...\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eparse Wind 3.2 mph NNE. Barometer 30.20 in, Rising s...\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eName: 0, dtype: object\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we'll use the \u003ca href=\"https://github.com/JasonKessler/agefromname\"\u003eAgeFromName\u003c/a\u003e package to find the probabilities of the\ngender of\neach user given their first name. First, we'll find a dataframe indexed on first names\nthat contains the probability that each someone with that first name is male (\u003ccode\u003emale_prob\u003c/code\u003e).\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"male_prob = agefromname.AgeFromName().get_all_name_male_prob()\nmale_prob.iloc[0]\n'''\nhi 1.00000\nlo 0.95741\nprob 1.00000\nName: aaban, dtype: float64\n'''\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003emale_prob\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eagefromname\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eAgeFromName\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_all_name_male_prob\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003emale_prob\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eiloc\u003c/span\u003e[\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e]\n\u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003ehi 1.00000\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003elo 0.95741\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eprob 1.00000\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003eName: aaban, dtype: float64\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we'll extract the first names of each user, and use the \u003ccode\u003emale_prob\u003c/code\u003e data frame\nto find users whose names indicate there is at least a 90% chance they are either male or female,\nlabel those users, and create new data frame \u003ccode\u003edf_mf\u003c/code\u003e with only those users.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"df['first_name'] = df['User Name'].apply(lambda x: x.split()[0].lower() if type(x) == str and len(x.split()) \u0026gt; 0 else x)\ndf_aug = pd.merge(df, male_prob, left_on='first_name', right_index=True)\ndf_aug['gender'] = df_aug['prob'].apply(lambda x: 'm' if x \u0026gt; 0.9 else 'f' if x \u0026lt; 0.1 else '?')\ndf_mf = df_aug[df_aug['gender'].isin(['m', 'f'])]\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'first_name'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'User Name'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e: \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esplit\u003c/span\u003e()[\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003elower\u003c/span\u003e() \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-en\"\u003etype\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e) \u003cspan class=\"pl-c1\"\u003e==\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003estr\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003eand\u003c/span\u003e \u003cspan class=\"pl-en\"\u003elen\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esplit\u003c/span\u003e()) \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eelse\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003edf_aug\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003epd\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003emerge\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003emale_prob\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eleft_on\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'first_name'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eright_index\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003edf_aug\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'gender'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf_aug\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'prob'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'm'\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026gt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.9\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eelse\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'f'\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eif\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e\u0026lt;\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e0.1\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eelse\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'?'\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003edf_mf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf_aug\u003c/span\u003e[\u003cspan class=\"pl-s1\"\u003edf_aug\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'gender'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eisin\u003c/span\u003e([\u003cspan class=\"pl-s\"\u003e'm'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'f'\u003c/span\u003e])]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eThe key to this analysis is to construct a corpus using only the emoji\nextractor \u003ccode\u003est.FeatsFromSpacyDocOnlyEmoji\u003c/code\u003e which builds a corpus only from\nemoji and not from anything else.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"corpus = st.CorpusFromParsedDocuments(\n df_mf,\n parsed_col='parse',\n category_col='gender',\n feats_from_spacy_doc=st.FeatsFromSpacyDocOnlyEmoji()\n).build()\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003edf_mf\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'gender'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003efeats_from_spacy_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFeatsFromSpacyDocOnlyEmoji\u003c/span\u003e()\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we'll run this through a standard \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e visualization\ngeneration.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_scattertext_explorer(\n corpus,\n category='f',\n category_name='Female',\n not_category_name='Male',\n use_full_doc=True,\n term_ranker=st.OncePerDocFrequencyRanker,\n sort_by_dist=False,\n metadata=(df_mf['User Name']\n + ' (@' + df_mf['Nickname'] + ') '\n + df_mf['Date'].astype(str)),\n width_in_pixels=1000\n)\nopen(\u0026quot;EmojiGender.html\u0026quot;, 'wb').write(html.encode('utf-8'))\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'f'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Female'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Male'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_full_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eterm_ranker\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eOncePerDocFrequencyRanker\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esort_by_dist\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edf_mf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'User Name'\u003c/span\u003e]\n \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e' (@'\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf_mf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Nickname'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e') '\u003c/span\u003e\n \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003edf_mf\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Date'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eastype\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003estr\u003c/span\u003e)),\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e\n)\n\u003cspan class=\"pl-en\"\u003eopen\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e\"EmojiGender.html\"\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'wb'\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003ewrite\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eencode\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'utf-8'\u003c/span\u003e))\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/EmojiGender.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/839142c6951d1b32dfbf5c2754266ac5c73338a28cd97449dc674e764097feba/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f456d6f6a6947656e6465722e706e67\" alt=\"EmojiGender.html\" data-canonical-src=\"https://jasonkessler.github.io/EmojiGender.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing SentencePiece Tokens\u003c/h3\u003e\u003ca id=\"user-content-visualizing-sentencepiece-tokens\" class=\"anchor\" aria-label=\"Permalink: Visualizing SentencePiece Tokens\" href=\"#visualizing-sentencepiece-tokens\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://github.com/google/sentencepiece\"\u003eSentencePiece\u003c/a\u003e tokenization is a subword tokenization technique which\nrelies on a language-model to produce optimized tokenization. It has been used in large, transformer-based contextual\nlanguage models.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eEnsure to run \u003ccode\u003e$ pip install sentencepiece\u003c/code\u003e before running this example.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, let's load the political convention data set as normal.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import tempfile\nimport re\nimport scattertext as st\n\nconvention_df = st.SampleCorpora.ConventionData2012.get_data()\nconvention_df['parse'] = convention_df.text.apply(st.whitespace_nlp_with_sentences)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003etempfile\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ere\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, let's train a SentencePiece tokenizer based on this data. The \u003ccode\u003etrain_sentence_piece_tokenizer\u003c/code\u003e function trains\na SentencePieceProcessor on the data set and returns it. You can of course use any SentencePieceProcessor.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"\ndef train_sentence_piece_tokenizer(documents, vocab_size):\n '''\n :param documents: list-like, a list of str documents\n :vocab_size int: the size of the vocabulary to output\n \n :return sentencepiece.SentencePieceProcessor\n '''\n import sentencepiece as spm\n sp = None\n with tempfile.NamedTemporaryFile(delete=True) as tempf:\n with tempfile.NamedTemporaryFile(delete=True) as tempm:\n tempf.write(('\\n'.join(documents)).encode())\n spm.SentencePieceTrainer.Train(\n '--input=%s --model_prefix=%s --vocab_size=%s' % (tempf.name, tempm.name, vocab_size)\n )\n sp = spm.SentencePieceProcessor()\n sp.load(tempm.name + '.model')\n return sp\n\n\nsp = train_sentence_piece_tokenizer(convention_df.text.values, vocab_size=2000)\n\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003edef\u003c/span\u003e \u003cspan class=\"pl-en\"\u003etrain_sentence_piece_tokenizer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edocuments\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003evocab_size\u003c/span\u003e):\n \u003cspan class=\"pl-s\"\u003e'''\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e :param documents: list-like, a list of str documents\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e :vocab_size int: the size of the vocabulary to output\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e \u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e :return sentencepiece.SentencePieceProcessor\u003c/span\u003e\n\u003cspan class=\"pl-s\"\u003e '''\u003c/span\u003e\n \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esentencepiece\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espm\u003c/span\u003e\n \u003cspan class=\"pl-s1\"\u003esp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003eNone\u003c/span\u003e\n \u003cspan class=\"pl-k\"\u003ewith\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003etempfile\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eNamedTemporaryFile\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edelete\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e) \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003etempf\u003c/span\u003e:\n \u003cspan class=\"pl-k\"\u003ewith\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003etempfile\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eNamedTemporaryFile\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edelete\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e) \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003etempm\u003c/span\u003e:\n \u003cspan class=\"pl-s1\"\u003etempf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewrite\u003c/span\u003e((\u003cspan class=\"pl-s\"\u003e'\u003cspan class=\"pl-cce\"\u003e\\n\u003c/span\u003e'\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ejoin\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003edocuments\u003c/span\u003e)).\u003cspan class=\"pl-c1\"\u003eencode\u003c/span\u003e())\n \u003cspan class=\"pl-s1\"\u003espm\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSentencePieceTrainer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eTrain\u003c/span\u003e(\n \u003cspan class=\"pl-s\"\u003e'--input=%s --model_prefix=%s --vocab_size=%s'\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e%\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003etempf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ename\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003etempm\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ename\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003evocab_size\u003c/span\u003e)\n )\n \u003cspan class=\"pl-s1\"\u003esp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003espm\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSentencePieceProcessor\u003c/span\u003e()\n \u003cspan class=\"pl-s1\"\u003esp\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eload\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003etempm\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ename\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e'.model'\u003c/span\u003e)\n \u003cspan class=\"pl-k\"\u003ereturn\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esp\u003c/span\u003e\n\n\n\u003cspan class=\"pl-s1\"\u003esp\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003etrain_sentence_piece_tokenizer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etext\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evalues\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003evocab_size\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e2000\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, let's add the SentencePiece tokens as metadata when creating our corpus. In order to do this, pass\na \u003ccode\u003eFeatsFromSentencePiece\u003c/code\u003e instance into the \u003ccode\u003efeats_from_spacy_doc\u003c/code\u003e parameter. Pass the SentencePieceProcessor into\nthe constructor.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"corpus = st.CorpusFromParsedDocuments(convention_df,\n parsed_col='parse',\n category_col='party',\n feats_from_spacy_doc=st.FeatsFromSentencePiece(sp)).build()\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003efeats_from_spacy_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFeatsFromSentencePiece\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esp\u003c/span\u003e)).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNow we can create the SentencePiece token scatter plot.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_scattertext_explorer(\n corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n sort_by_dist=False,\n metadata=convention_df['party'] + ': ' + convention_df['speaker'],\n term_scorer=st.RankDifference(),\n transform=st.Scalers.dense_rank,\n use_non_text_features=True,\n use_full_doc=True,\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003esort_by_dist\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s\"\u003e': '\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e+\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003eterm_scorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDifference\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003etransform\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eScalers\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edense_rank\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_non_text_features\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_full_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_sentence_piece.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_sentence_piece.png\" alt=\"demo_sentence_piece.html\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing scikit-learn text classification weights\u003c/h3\u003e\u003ca id=\"user-content-visualizing-scikit-learn-text-classification-weights\" class=\"anchor\" aria-label=\"Permalink: Visualizing scikit-learn text classification weights\" href=\"#visualizing-scikit-learn-text-classification-weights\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eSuppose you'd like to audit or better understand\nweights or importances given to bag-of-words features\nby a classifier.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIt's easy to use Scattertext to do, if you use a Scikit-learn-style classifier.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor example the \u003ca href=\"http://contrib.scikit-learn.org/lightning/\" rel=\"nofollow\"\u003eLighting\u003c/a\u003e package makes available\nhigh-performance linear classifiers which are have Scikit-compatible interfaces.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFirst, let's import \u003ccode\u003esklearn\u003c/code\u003e's text feature extraction classes, the 20 Newsgroup\ncorpus, Lightning's Primal Coordinate Descent classifier, and Scattertext. We'll also\nfetch the training portion of the Newsgroup corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"from lightning.classification import CDClassifier\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n\nimport scattertext as st\n\nnewsgroups_train = fetch_20newsgroups(\n subset='train',\n remove=('headers', 'footers', 'quotes')\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003elightning\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003eclassification\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eCDClassifier\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003edatasets\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efetch_20newsgroups\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003efeature_extraction\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003etext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eCountVectorizer\u003c/span\u003e, \u003cspan class=\"pl-v\"\u003eTfidfVectorizer\u003c/span\u003e\n\n\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003efetch_20newsgroups\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003esubset\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'train'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eremove\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'headers'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'footers'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'quotes'\u003c/span\u003e)\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we'll tokenize our corpus twice. Once into tfidf features\nwhich will be used to train the classifier, an another time into\nngram counts that will be used by Scattertext. It's important that\nboth vectorizers share the same vocabulary, since we'll need to apply the\nweight vector from the model onto our Scattertext Corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"vectorizer = TfidfVectorizer()\ntfidf_X = vectorizer.fit_transform(newsgroups_train.data)\ncount_vectorizer = CountVectorizer(vocabulary=vectorizer.vocabulary_)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003evectorizer\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eTfidfVectorizer\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003etfidf_X\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003evectorizer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efit_transform\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edata\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003ecount_vectorizer\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eCountVectorizer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003evocabulary\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003evectorizer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evocabulary_\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we use the \u003ccode\u003eCorpusFromScikit\u003c/code\u003e factory to build a Scattertext Corpus object.\nEnsure the \u003ccode\u003eX\u003c/code\u003e parameter is a document-by-feature matrix. The argument to the\n\u003ccode\u003ey\u003c/code\u003e parameter is an array of class labels. Each label is an integer representing\na different news group. We the \u003ccode\u003efeature_vocabulary\u003c/code\u003e is the vocabulary used by the\nvectorizers. The \u003ccode\u003ecategory_names\u003c/code\u003e are a list of the 20 newsgroup names which\nas a class-label list. The \u003ccode\u003eraw_texts\u003c/code\u003e is a list of the text of newsgroup texts.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"corpus = st.CorpusFromScikit(\n X=count_vectorizer.fit_transform(newsgroups_train.data),\n y=newsgroups_train.target,\n feature_vocabulary=vectorizer.vocabulary_,\n category_names=newsgroups_train.target_names,\n raw_texts=newsgroups_train.data\n).build()\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromScikit\u003c/span\u003e(\n \u003cspan class=\"pl-c1\"\u003eX\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003ecount_vectorizer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efit_transform\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edata\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003ey\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etarget\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003efeature_vocabulary\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003evectorizer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003evocabulary_\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_names\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etarget_names\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eraw_texts\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edata\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNow, we can train the model on \u003ccode\u003etfidf_X\u003c/code\u003e and the categoricla response variable,\nand capture feature weights for category 0 (\"alt.atheism\").\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"clf = CDClassifier(penalty=\u0026quot;l1/l2\u0026quot;,\n loss=\u0026quot;squared_hinge\u0026quot;,\n multiclass=True,\n max_iter=20,\n alpha=1e-4,\n C=1.0 / tfidf_X.shape[0],\n tol=1e-3)\nclf.fit(tfidf_X, newsgroups_train.target)\nterm_scores = clf.coef_[0]\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003eCDClassifier\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003epenalty\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"l1/l2\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eloss\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e\"squared_hinge\"\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emulticlass\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emax_iter\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e20\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ealpha\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1e-4\u003c/span\u003e,\n \u003cspan class=\"pl-c1\"\u003eC\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1.0\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e/\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003etfidf_X\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eshape\u003c/span\u003e[\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003etol\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1e-3\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efit\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003etfidf_X\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etarget\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003eterm_scores\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecoef_\u003c/span\u003e[\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e]\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFinally, we can create a Scattertext plot. We'll use the Monroe-style visualization, and automatically\nselect around 4000 terms that encompass the set of frequent terms, terms with high absolute scores,\nand terms that are characteristic of the corpus.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"html = st.produce_frequency_explorer(\n corpus,\n 'alt.atheism',\n scores=term_scores,\n use_term_significance=False,\n terms_to_include=st.AutoTermSelector.get_selected_terms(corpus, term_scores, 4000),\n metadata=['/'.join(fn.split('/')[-2:]) for fn in newsgroups_train.filenames]\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_frequency_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'alt.atheism'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003escores\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eterm_scores\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_term_significance\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eFalse\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eterms_to_include\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eAutoTermSelector\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_selected_terms\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eterm_scores\u003c/span\u003e, \u003cspan class=\"pl-c1\"\u003e4000\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'/'\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ejoin\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003esplit\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'/'\u003c/span\u003e)[\u003cspan class=\"pl-c1\"\u003e-\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e2\u003c/span\u003e:]) \u003cspan class=\"pl-k\"\u003efor\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003efn\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003ein\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003enewsgroups_train\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003efilenames\u003c/span\u003e]\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_sklearn.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_sklearn.png\" alt=\"demo_sklearn.html\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eLet's take a look at the performance of the classifier:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"newsgroups_test = fetch_20newsgroups(subset='test',\n remove=('headers', 'footers', 'quotes'))\nX_test = vectorizer.transform(newsgroups_test.data)\npred = clf.predict(X_test)\nf1 = f1_score(pred, newsgroups_test.target, average='micro')\nprint(\u0026quot;Microaveraged F1 score\u0026quot;, f1)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003enewsgroups_test\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003efetch_20newsgroups\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esubset\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'test'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eremove\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e'headers'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'footers'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'quotes'\u003c/span\u003e))\n\u003cspan class=\"pl-v\"\u003eX_test\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003evectorizer\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etransform\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003enewsgroups_test\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003edata\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003epred\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eclf\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003epredict\u003c/span\u003e(\u003cspan class=\"pl-v\"\u003eX_test\u003c/span\u003e)\n\u003cspan class=\"pl-s1\"\u003ef1\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-en\"\u003ef1_score\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003epred\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003enewsgroups_test\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003etarget\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eaverage\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'micro'\u003c/span\u003e)\n\u003cspan class=\"pl-en\"\u003eprint\u003c/span\u003e(\u003cspan class=\"pl-s\"\u003e\"Microaveraged F1 score\"\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ef1\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eMicroaveraged F1 score 0.662108337759. Not bad over a ~0.05 baseline.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCreating lexicalized semiotic squares\u003c/h3\u003e\u003ca id=\"user-content-creating-lexicalized-semiotic-squares\" class=\"anchor\" aria-label=\"Permalink: Creating lexicalized semiotic squares\" href=\"#creating-lexicalized-semiotic-squares\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ePlease see \u003ca href=\"http://www.signosemio.com/greimas/semiotic-square.asp\" rel=\"nofollow\"\u003eSigno\u003c/a\u003e for an\nintroduction to semiotic squares.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSome variants of the semiotic square-creator are can be seen in this notebook, which studies\nwords and phrases in headlines that had low or high Facebook engagement and were published by\neither BuzzFeed or the New York\nTimes: [\u003ca href=\"http://nbviewer.jupyter.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb\" rel=\"nofollow\"\u003ehttp://nbviewer.jupyter.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb\u003c/a\u003e]\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe idea behind the semiotic square is to express the relationship between two opposing\nconcepts and concepts things within a larger domain of a discourse.\nExamples of opposed concepts life or death, male or female, or, in our example, positive or negative sentiment.\nSemiotics squares are comprised of four \"corners\": the upper two corners are the opposing concepts,\nwhile the bottom corners are the negation of the concepts.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCircumscribing the negation of a concept involves finding everything in the\ndomain of discourse that isn't associated with the concept. For example, in the\nlife-death opposition, one can consider the universe of discourse to be all\nanimate beings, real and hypothetical. The not-alive category will cover dead things,\nbut also hypothetical entities like fictional characters or sentient AIs.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn building lexicalized semiotic squares, we consider concepts to be documents labeled\nin a corpus. Documents, in this setting, can belong to one of three categories: two labels corresponding\nto the opposing concepts, a neutral category, indicating a document is in the same domain as\nthe opposition, but cannot fall into one of opposing categories.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn the example below positive and negative movie reviews are treated as the opposing categories,\nwhile plot descriptions of the same movies are treated as the neutral category.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTerms associated with one of the two opposing categories (relative only to the other) are\nlisted as being associated with that category. Terms associated with a netural category\n(e.g., not positive) are terms which are associated with the disjunction of the opposite\ncategory and the neutral category. For example, not-positive terms are those most associated\nwith the set of negative reviews and plot descriptions vs. positive reviews.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCommon terms among adjacent corners of the square are also listed.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAn HTML-rendered square is accompanied by a scatter plot. Points on the plot are terms.\nThe x-axis is the Z-score of the association to one of the opposed concepts. The y-axis\nis the Z-score how associated a term is with the neutral set of documents relative to the\nopposed set. A point's red-blue color indicate the term's opposed-association, while\nthe more desaturated a term is, the more it is associated with the neutral set of documents.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eUpdate to version 2.2: terms are colored by their nearest semiotic categories across the eight\ncorresponding radial sectors.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\n\nmovie_df = st.SampleCorpora.RottenTomatoes.get_data()\nmovie_df.category = movie_df.category.apply\n (lambda x: {'rotten': 'Negative', 'fresh': 'Positive', 'plot': 'Plot'}[x])\ncorpus = st.CorpusFromPandas(\n movie_df,\n category_col='category',\n text_col='text',\n nlp=st.whitespace_nlp_with_sentences\n).build().get_unigram_corpus()\n\nsemiotic_square = st.SemioticSquare(\n corpus,\n category_a='Positive',\n category_b='Negative',\n neutral_categories=['Plot'],\n scorer=st.RankDifference(),\n labels={'not_a_and_not_b': 'Plot Descriptions', 'a_and_b': 'Reviews'}\n)\n\nhtml = st.produce_semiotic_square_explorer(semiotic_square,\n category_name='Positive',\n not_category_name='Negative',\n x_label='Fresh-Rotten',\n y_label='Plot-Review',\n neutral_category_name='Plot Description',\n metadata=movie_df['movie_name'])\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003escattertext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eas\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003emovie_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRottenTomatoes\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003emovie_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecategory\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003emovie_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ecategory\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e\n (\u003cspan class=\"pl-k\"\u003elambda\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e: {\u003cspan class=\"pl-s\"\u003e'rotten'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'fresh'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'plot'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Plot'\u003c/span\u003e}[\u003cspan class=\"pl-s1\"\u003ex\u003c/span\u003e])\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromPandas\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003emovie_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'category'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etext_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enlp\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_unigram_corpus\u003c/span\u003e()\n\n\u003cspan class=\"pl-s1\"\u003esemiotic_square\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSemioticSquare\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_a\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_b\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eneutral_categories\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'Plot'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003escorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDifference\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003elabels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e{\u003cspan class=\"pl-s\"\u003e'not_a_and_not_b'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Plot Descriptions'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'a_and_b'\u003c/span\u003e: \u003cspan class=\"pl-s\"\u003e'Reviews'\u003c/span\u003e}\n)\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_semiotic_square_explorer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esemiotic_square\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Positive'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Negative'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ex_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Fresh-Rotten'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ey_label\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Plot-Review'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eneutral_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Plot Description'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003emovie_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'movie_name'\u003c/span\u003e])\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_semiotic.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/d90444553d3659e6515919f00b174504233fc7d85e7e19e999cda1bd6cb35124/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f73656d696f7469635f7371756172655f706c6f742e706e67\" alt=\"semiotic square\" data-canonical-src=\"https://jasonkessler.github.io/semiotic_square_plot.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThere are a number of other types of semiotic square construction functions. Again, please\nsee \u003ca href=\"https://nbviewer.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb\" rel=\"nofollow\"\u003ehttps://nbviewer.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb\u003c/a\u003e for\nan overview of these.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eVisualizing Topic Models\u003c/h3\u003e\u003ca id=\"user-content-visualizing-topic-models\" class=\"anchor\" aria-label=\"Permalink: Visualizing Topic Models\" href=\"#visualizing-topic-models\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eA frequently requested feature of Scattertext has been the ability to visualize topic\nmodels. While this capability has existed in some forms (e.g., the Empath visualization),\nI've finally gotten around to implementing a concise API for such a visualization.\nThere are three main ways to visualize topic models using Scattertext.\nThe first is the simplest: manually entering topic models and visualizing them.\nThe second uses a Scikit-Learn pipeline to produce the topic models for visualization.\nThe third is a novel topic modeling technique, based on finding terms similar to a\ncustom set of seed terms.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eManually entered topic models\u003c/h4\u003e\u003ca id=\"user-content-manually-entered-topic-models\" class=\"anchor\" aria-label=\"Permalink: Manually entered topic models\" href=\"#manually-entered-topic-models\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIf you have already created a topic model, simply structure it as a dictionary.\nThis dictionary is keyed on string which serve as topic titles and are displayed\nin the main scatterplot. The values are lists of words that belong to that topic. The words\nthat are in each topic list are bolded when they appear in a snippet.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that currently, there is no support for keyword scores.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor example, one might manually the following topic models to explore in the Convention\ncorpus:\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"topic_model = {\n 'money': ['money', 'bank', 'banks', 'finances', 'financial', 'loan', 'dollars', 'income'],\n 'jobs': ['jobs', 'workers', 'labor', 'employment', 'worker', 'employee', 'job'],\n 'patriotic': ['america', 'country', 'flag', 'americans', 'patriotism', 'patriotic'],\n 'family': ['mother', 'father', 'mom', 'dad', 'sister', 'brother', 'grandfather', 'grandmother', 'son', 'daughter']\n}\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003etopic_model\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e {\n \u003cspan class=\"pl-s\"\u003e'money'\u003c/span\u003e: [\u003cspan class=\"pl-s\"\u003e'money'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'bank'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'banks'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'finances'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'financial'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'loan'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'dollars'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'income'\u003c/span\u003e],\n \u003cspan class=\"pl-s\"\u003e'jobs'\u003c/span\u003e: [\u003cspan class=\"pl-s\"\u003e'jobs'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'workers'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'labor'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'employment'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'worker'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'employee'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'job'\u003c/span\u003e],\n \u003cspan class=\"pl-s\"\u003e'patriotic'\u003c/span\u003e: [\u003cspan class=\"pl-s\"\u003e'america'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'country'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'flag'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'americans'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'patriotism'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'patriotic'\u003c/span\u003e],\n \u003cspan class=\"pl-s\"\u003e'family'\u003c/span\u003e: [\u003cspan class=\"pl-s\"\u003e'mother'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'father'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'mom'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'dad'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'sister'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'brother'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'grandfather'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'grandmother'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'son'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'daughter'\u003c/span\u003e]\n}\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe can use the \u003ccode\u003eFeatsFromTopicModel\u003c/code\u003e class to transform this topic model into one which\ncan be visualized using Scattertext. This is used just like any other feature builder,\nand we pass the topic model object into \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"import scattertext as st\n\ntopic_feature_builder = st.FeatsFromTopicModel(topic_model)\n\ntopic_corpus = st.CorpusFromParsedDocuments(\n\tconvention_df,\n\tcategory_col='party',\n\tparsed_col='parse',\n\tfeats_from_spacy_doc=topic_feature_builder\n).build()\n\nhtml = st.produce_scattertext_explorer(\n\ttopic_corpus,\n\tcategory='democrat',\n\tcategory_name='Democratic',\n\tnot_category_name='Republican',\n\twidth_in_pixels=1000,\n\tmetadata=convention_df['speaker'],\n\tuse_non_text_features=True,\n\tuse_full_doc=True,\n\tpmi_threshold_coefficient=0,\n\ttopic_model_term_lists=topic_feature_builder.get_top_model_term_lists()\n)\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003eimport scattertext as st\n\ntopic_feature_builder = st.FeatsFromTopicModel(topic_model)\n\ntopic_corpus = st.CorpusFromParsedDocuments(\n\tconvention_df,\n\tcategory_col='party',\n\tparsed_col='parse',\n\tfeats_from_spacy_doc=topic_feature_builder\n).build()\n\nhtml = st.produce_scattertext_explorer(\n\ttopic_corpus,\n\tcategory='democrat',\n\tcategory_name='Democratic',\n\tnot_category_name='Republican',\n\twidth_in_pixels=1000,\n\tmetadata=convention_df['speaker'],\n\tuse_non_text_features=True,\n\tuse_full_doc=True,\n\tpmi_threshold_coefficient=0,\n\ttopic_model_term_lists=topic_feature_builder.get_top_model_term_lists()\n)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_custom_topic_model.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_custom_topic_model.png\" alt=\"demo_custom_topic_model.html\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing Scikit-Learn for Topic Modeling\u003c/h4\u003e\u003ca id=\"user-content-using-scikit-learn-for-topic-modeling\" class=\"anchor\" aria-label=\"Permalink: Using Scikit-Learn for Topic Modeling\" href=\"#using-scikit-learn-for-topic-modeling\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eSince topic modeling using document-level coocurence generally produces poor results,\nI've added a \u003ccode\u003eSentencesForTopicModeling\u003c/code\u003e class which allows clusterting by coocurence\nat the sentence-level. It requires a \u003ccode\u003eParsedCorpus\u003c/code\u003e object to be passed to its constructor,\nand creates a term-sentence matrix internally.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNext, you can create a topic model dictionary like the one above by passing in a Scikit-Learn\nclustering or dimensionality reduction pipeline. The only constraint is the last transformer\nin the pipeline must populate a \u003ccode\u003ecomponents_\u003c/code\u003e attribute.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe \u003ccode\u003enum_topics_per_term\u003c/code\u003e attribute specifies how many terms should be added to a list.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn the following example, we'll use NMF to cluster a stoplisted, unigram corpus of documents,\nand use the topic model dictionary to create a \u003ccode\u003eFeatsFromTopicModel\u003c/code\u003e, just like before.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that in \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e, we make the \u003ccode\u003etopic_model_preview_size\u003c/code\u003e 20 in order to show\na preview of the first 20 terms in the topic in the snippet view as opposed to the default 10.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"from sklearn.decomposition import NMF\nfrom sklearn.feature_extraction.text import TfidfTransformer\nfrom sklearn.pipeline import Pipeline\n\nconvention_df = st.SampleCorpora.ConventionData2012.get_data()\nconvention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)\n\nunigram_corpus = (st.CorpusFromParsedDocuments(convention_df,\n category_col='party',\n parsed_col='parse')\n .build().get_stoplisted_unigram_corpus())\ntopic_model = st.SentencesForTopicModeling(unigram_corpus).get_topics_from_model(\n Pipeline([\n ('tfidf', TfidfTransformer(sublinear_tf=True)),\n ('nmf', (NMF(n_components=100, alpha=.1, l1_ratio=.5, random_state=0)))\n ]),\n num_terms_per_topic=20\n)\n\ntopic_feature_builder = st.FeatsFromTopicModel(topic_model)\n\ntopic_corpus = st.CorpusFromParsedDocuments(\n convention_df,\n category_col='party',\n parsed_col='parse',\n feats_from_spacy_doc=topic_feature_builder\n).build()\n\nhtml = st.produce_scattertext_explorer(\n topic_corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n width_in_pixels=1000,\n metadata=convention_df['speaker'],\n use_non_text_features=True,\n use_full_doc=True,\n pmi_threshold_coefficient=0,\n topic_model_term_lists=topic_feature_builder.get_top_model_term_lists(),\n topic_model_preview_size=20\n)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003edecomposition\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003eNMF\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003efeature_extraction\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003etext\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eTfidfTransformer\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003esklearn\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003epipeline\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003ePipeline\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003eunigram_corpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_stoplisted_unigram_corpus\u003c/span\u003e())\n\u003cspan class=\"pl-s1\"\u003etopic_model\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSentencesForTopicModeling\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eunigram_corpus\u003c/span\u003e).\u003cspan class=\"pl-c1\"\u003eget_topics_from_model\u003c/span\u003e(\n \u003cspan class=\"pl-en\"\u003ePipeline\u003c/span\u003e([\n (\u003cspan class=\"pl-s\"\u003e'tfidf'\u003c/span\u003e, \u003cspan class=\"pl-en\"\u003eTfidfTransformer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esublinear_tf\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e)),\n (\u003cspan class=\"pl-s\"\u003e'nmf'\u003c/span\u003e, (\u003cspan class=\"pl-en\"\u003eNMF\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003en_components\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e100\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ealpha\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e.1\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003el1_ratio\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e.5\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003erandom_state\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e)))\n ]),\n \u003cspan class=\"pl-s1\"\u003enum_terms_per_topic\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e20\u003c/span\u003e\n)\n\n\u003cspan class=\"pl-s1\"\u003etopic_feature_builder\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFeatsFromTopicModel\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003etopic_model\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003etopic_corpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003efeats_from_spacy_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003etopic_feature_builder\u003c/span\u003e\n).\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e()\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_scattertext_explorer\u003c/span\u003e(\n \u003cspan class=\"pl-s1\"\u003etopic_corpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ewidth_in_pixels\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e1000\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'speaker'\u003c/span\u003e],\n \u003cspan class=\"pl-s1\"\u003euse_non_text_features\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003euse_full_doc\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003eTrue\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003epmi_threshold_coefficient\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003etopic_model_term_lists\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003etopic_feature_builder\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_top_model_term_lists\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003etopic_model_preview_size\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e20\u003c/span\u003e\n)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_nmf_topic_model.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/be5122831e504d8d1e6a86f60161cd0b5316c8f49600eb53d73314ac2f01ba6e/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f6e6d665f746f7069635f6d6f64656c2e706e67\" alt=\"demo_nmf_topic_model.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_nmf_topic_model.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch4 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing a Word List to Generate a Series of Topics\u003c/h4\u003e\u003ca id=\"user-content-using-a-word-list-to-generate-a-series-of-topics\" class=\"anchor\" aria-label=\"Permalink: Using a Word List to Generate a Series of Topics\" href=\"#using-a-word-list-to-generate-a-series-of-topics\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eA surprisingly easy way to generate good topic models is to use a term scoring formula\nto find words that are associated with sentences where a seed word occurs vs. where\none doesn't occur.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eGiven a custom term list, the \u003ccode\u003eSentencesForTopicModeling.get_topics_from_terms\u003c/code\u003e will\ngenerate a series of topics. Note that the dense rank difference (\u003ccode\u003eRankDifference\u003c/code\u003e) works\nparticularly well for this task, and is the default parameter.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"term_list = ['obama', 'romney', 'democrats', 'republicans', 'health', 'military', 'taxes',\n 'education', 'olympics', 'auto', 'iraq', 'iran', 'israel']\n\nunigram_corpus = (st.CorpusFromParsedDocuments(convention_df,\n category_col='party',\n parsed_col='parse')\n .build().get_stoplisted_unigram_corpus())\n\ntopic_model = (st.SentencesForTopicModeling(unigram_corpus)\n .get_topics_from_terms(term_list,\n scorer=st.RankDifference(),\n num_terms_per_topic=20))\n\ntopic_feature_builder = st.FeatsFromTopicModel(topic_model)\n# The remaining code is identical to two examples above. See demo_word_list_topic_model.py\n# for the complete example.\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003eterm_list\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e [\u003cspan class=\"pl-s\"\u003e'obama'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'romney'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'democrats'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'republicans'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'health'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'military'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'taxes'\u003c/span\u003e,\n \u003cspan class=\"pl-s\"\u003e'education'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'olympics'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'auto'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'iraq'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'iran'\u003c/span\u003e, \u003cspan class=\"pl-s\"\u003e'israel'\u003c/span\u003e]\n\n\u003cspan class=\"pl-s1\"\u003eunigram_corpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_stoplisted_unigram_corpus\u003c/span\u003e())\n\n\u003cspan class=\"pl-s1\"\u003etopic_model\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSentencesForTopicModeling\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eunigram_corpus\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003eget_topics_from_terms\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003eterm_list\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003escorer\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eRankDifference\u003c/span\u003e(),\n \u003cspan class=\"pl-s1\"\u003enum_terms_per_topic\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e20\u003c/span\u003e))\n\n\u003cspan class=\"pl-s1\"\u003etopic_feature_builder\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eFeatsFromTopicModel\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003etopic_model\u003c/span\u003e)\n\u003cspan class=\"pl-c\"\u003e# The remaining code is identical to two examples above. See demo_word_list_topic_model.py\u003c/span\u003e\n\u003cspan class=\"pl-c\"\u003e# for the complete example.\u003c/span\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_word_list_topic_model.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/e913957605561125d453405dbb5fea90e88d29e942e8632bd5deb668fb3d46d1/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f776f72645f6c6973745f746f7069635f6d6f64656c2e706e67\" alt=\"demo_word_list_topic_model.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_word_list_topic_model.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eCreating T-SNE-style word embedding projection plots\u003c/h3\u003e\u003ca id=\"user-content-creating-t-sne-style-word-embedding-projection-plots\" class=\"anchor\" aria-label=\"Permalink: Creating T-SNE-style word embedding projection plots\" href=\"#creating-t-sne-style-word-embedding-projection-plots\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eScattertext makes it easy to create word-similarity plots using projections of word embeddings as the x and y-axes.\nIn the example below, we create a stop-listed Corpus with only unigram terms. The \u003ccode\u003eproduce_projection_explorer\u003c/code\u003e function\nby uses Gensim to create word embeddings and then projects them to two dimentions using Uniform Manifold Approximation\nand Projection (UMAP).\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eUMAP is chosen over T-SNE because it can employ the cosine similarity between two word vectors instead of just the\neuclidean distance.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"convention_df = st.SampleCorpora.ConventionData2012.get_data()\nconvention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)\n\ncorpus = (st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse')\n .build().get_stoplisted_unigram_corpus())\n\nhtml = st.produce_projection_explorer(corpus, category='democrat', category_name='Democratic',\n not_category_name='Republican', metadata=convention_df.speaker)\"\u003e\u003cpre\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eSampleCorpora\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eConventionData2012\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eget_data\u003c/span\u003e()\n\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e] \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e[\u003cspan class=\"pl-s\"\u003e'text'\u003c/span\u003e].\u003cspan class=\"pl-c1\"\u003eapply\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003ewhitespace_nlp_with_sentences\u003c/span\u003e)\n\n\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e (\u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eCorpusFromParsedDocuments\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'party'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eparsed_col\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'parse'\u003c/span\u003e)\n .\u003cspan class=\"pl-c1\"\u003ebuild\u003c/span\u003e().\u003cspan class=\"pl-c1\"\u003eget_stoplisted_unigram_corpus\u003c/span\u003e())\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_projection_explorer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003espeaker\u003c/span\u003e)\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIn order to use custom word embedding functions or projection functions, pass models into the \u003ccode\u003eword2vec_model\u003c/code\u003e\nand \u003ccode\u003eprojection_model\u003c/code\u003e parameters. In order to use T-SNE, for example, use\n\u003ccode\u003eprojection_model=sklearn.manifold.TSNE()\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"highlight highlight-source-python notranslate position-relative overflow-auto\" dir=\"auto\" data-snippet-clipboard-copy-content=\"import umap\nfrom gensim.models.word2vec import Word2Vec\n\nhtml = st.produce_projection_explorer(corpus,\n word2vec_model=Word2Vec(size=100, window=5, min_count=10, workers=4),\n projection_model=umap.UMAP(min_dist=0.5, metric='cosine'),\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n metadata=convention_df.speaker) \"\u003e\u003cpre\u003e\u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003eumap\u003c/span\u003e\n\u003cspan class=\"pl-k\"\u003efrom\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003egensim\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003emodels\u003c/span\u003e.\u003cspan class=\"pl-s1\"\u003eword2vec\u003c/span\u003e \u003cspan class=\"pl-k\"\u003eimport\u003c/span\u003e \u003cspan class=\"pl-v\"\u003eWord2Vec\u003c/span\u003e\n\n\u003cspan class=\"pl-s1\"\u003ehtml\u003c/span\u003e \u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e \u003cspan class=\"pl-s1\"\u003est\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eproduce_projection_explorer\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003ecorpus\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003eword2vec_model\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-en\"\u003eWord2Vec\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003esize\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e100\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003ewindow\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e5\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003emin_count\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e10\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003eworkers\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e4\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003eprojection_model\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003eumap\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003eUMAP\u003c/span\u003e(\u003cspan class=\"pl-s1\"\u003emin_dist\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e0.5\u003c/span\u003e, \u003cspan class=\"pl-s1\"\u003emetric\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'cosine'\u003c/span\u003e),\n \u003cspan class=\"pl-s1\"\u003ecategory\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'democrat'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003ecategory_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Democratic'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003enot_category_name\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s\"\u003e'Republican'\u003c/span\u003e,\n \u003cspan class=\"pl-s1\"\u003emetadata\u003c/span\u003e\u003cspan class=\"pl-c1\"\u003e=\u003c/span\u003e\u003cspan class=\"pl-s1\"\u003econvention_df\u003c/span\u003e.\u003cspan class=\"pl-c1\"\u003espeaker\u003c/span\u003e) \u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/demo_tsne_style.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/e1ef8a377a9b14fba6e0094d2c518ed479d4db638204c11902456c430b66149c/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f74736e655f7374796c652e706e67\" alt=\"t-sne style plot\" data-canonical-src=\"https://jasonkessler.github.io/demo_tsne_style.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eUsing SVD to visualize any kind of word embeddings\u003c/h3\u003e\u003ca id=\"user-content-using-svd-to-visualize-any-kind-of-word-embeddings\" class=\"anchor\" aria-label=\"Permalink: Using SVD to visualize any kind of word embeddings\" href=\"#using-svd-to-visualize-any-kind-of-word-embeddings\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTerm positions can also be determined by the positions of terms according to the output of principal component analysis,\nand \u003ccode\u003eproduce_projection_explorer\u003c/code\u003e also supports this functionality. We'll look at how axes transformations (\"scalers\"\nin Scattertext terminology) can make it easier to inspect the output of PCA.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe'll use the 2012 Conventions corpus for these visualizations. Only unigrams occurring in at least three documents\nwill be considered.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; convention_df = st.SampleCorpora.ConventionData2012.get_data()\n\u0026gt;\u0026gt;\u0026gt; convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)\n\u0026gt;\u0026gt;\u0026gt; corpus = (st.CorpusFromParsedDocuments(convention_df,\n... category_col='party',\n... parsed_col='parse')\n... .build()\n... .get_stoplisted_unigram_corpus()\n... .remove_infrequent_words(minimum_term_count=3, term_ranker=st.OncePerDocFrequencyRanker))\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; convention_df = st.SampleCorpora.ConventionData2012.get_data()\n\u0026gt;\u0026gt;\u0026gt; convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences)\n\u0026gt;\u0026gt;\u0026gt; corpus = (st.CorpusFromParsedDocuments(convention_df,\n... category_col='party',\n... parsed_col='parse')\n... .build()\n... .get_stoplisted_unigram_corpus()\n... .remove_infrequent_words(minimum_term_count=3, term_ranker=st.OncePerDocFrequencyRanker))\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNext, we use scikit-learn's tf-idf transformer to find very simple, sparse embeddings for all of these words. Since,\nwe input a #docs x #terms matrix to the transformer, we can transpose it to get a proper term-embeddings matrix, where\neach row\ncorresponds to a term, and the columns correspond to document-specific tf-idf scores.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; from sklearn.feature_extraction.text import TfidfTransformer\n\u0026gt;\u0026gt;\u0026gt; embeddings = TfidfTransformer().fit_transform(corpus.get_term_doc_mat())\n\u0026gt;\u0026gt;\u0026gt; embeddings.shape\n(189, 2159)\n\u0026gt;\u0026gt;\u0026gt; corpus.get_num_docs(), corpus.get_num_terms()\n(189, 2159) \n\u0026gt;\u0026gt;\u0026gt; embeddings = embeddings.T\n\u0026gt;\u0026gt;\u0026gt; embeddings.shape\n(2159, 189)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; from sklearn.feature_extraction.text import TfidfTransformer\n\u0026gt;\u0026gt;\u0026gt; embeddings = TfidfTransformer().fit_transform(corpus.get_term_doc_mat())\n\u0026gt;\u0026gt;\u0026gt; embeddings.shape\n(189, 2159)\n\u0026gt;\u0026gt;\u0026gt; corpus.get_num_docs(), corpus.get_num_terms()\n(189, 2159) \n\u0026gt;\u0026gt;\u0026gt; embeddings = embeddings.T\n\u0026gt;\u0026gt;\u0026gt; embeddings.shape\n(2159, 189)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eGiven these spare embeddings, we can apply sparse singular value decomposition to extract three factors. SVD outputs\nfactorizes the term embeddings matrix into three matrices, U, Σ, and VT. Importantly, the matrix U provides the singular\nvalues\nfor each term, and VT provides them for each document, and Σ is a vector of the singular values.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; from scipy.sparse.linalg import svds\n\u0026gt;\u0026gt;\u0026gt; U, S, VT = svds(embeddings, k = 3, maxiter=20000, which='LM')\n\u0026gt;\u0026gt;\u0026gt; U.shape\n(2159, 3)\n\u0026gt;\u0026gt;\u0026gt; S.shape\n(3,)\n\u0026gt;\u0026gt;\u0026gt; VT.shape\n(3, 189)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; from scipy.sparse.linalg import svds\n\u0026gt;\u0026gt;\u0026gt; U, S, VT = svds(embeddings, k = 3, maxiter=20000, which='LM')\n\u0026gt;\u0026gt;\u0026gt; U.shape\n(2159, 3)\n\u0026gt;\u0026gt;\u0026gt; S.shape\n(3,)\n\u0026gt;\u0026gt;\u0026gt; VT.shape\n(3, 189)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe'll look at the first two singular values, plotting each term such that the x-axis position is the first singular\nvalue, and the y-axis term is the second. To do this, we make a \"projection\" data frame, where the \u003ccode\u003ex\u003c/code\u003e and \u003ccode\u003ey\u003c/code\u003e\ncolumns store the first two singular values, and key the data frame on each term. This controls the term positions\non the chart.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; x_dim = 0; y_dim = 1;\n\u0026gt;\u0026gt;\u0026gt; projection = pd.DataFrame({'term':corpus.get_terms(),\n... 'x':U.T[x_dim],\n... 'y':U.T[y_dim]}).set_index('term')\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; x_dim = 0; y_dim = 1;\n\u0026gt;\u0026gt;\u0026gt; projection = pd.DataFrame({'term':corpus.get_terms(),\n... 'x':U.T[x_dim],\n... 'y':U.T[y_dim]}).set_index('term')\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eWe'll use the \u003ccode\u003eproduce_pca_explorer\u003c/code\u003e function to visualize these. Note we include the projection object, and specify\nwhich singular values were used for x and y (\u003ccode\u003ex_dim\u003c/code\u003e and \u003ccode\u003ey_dim\u003c/code\u003e) so we they can be labeled in the interactive\nvisualization.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"html = st.produce_pca_explorer(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n projection=projection,\n metadata=convention_df['speaker'],\n width_in_pixels=1000,\n x_dim=x_dim,\n y_dim=y_dim)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003ehtml = st.produce_pca_explorer(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n projection=projection,\n metadata=convention_df['speaker'],\n width_in_pixels=1000,\n x_dim=x_dim,\n y_dim=y_dim)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eClick for an interactive visualization.\u003cbr\u003e\n\u003ca href=\"https://jasonkessler.github.io/demo_embeddings_svd_0_1.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/b1112c4d589e3e1d2729b3c8f98b16921fb3e60c0fe850ee3946b46f737f27b9/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f737664312e706e67\" alt=\"pca\" data-canonical-src=\"https://jasonkessler.github.io/svd1.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eWe can easily re-scale the plot in order to make more efficient use of space. For example, passing in\n\u003ccode\u003escaler=scale_neg_1_to_1_with_zero_mean\u003c/code\u003e will make all four quadrants take equal area.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"html = st.produce_pca_explorer(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n projection=projection,\n metadata=convention_df['speaker'],\n width_in_pixels=1000,\n scaler=st.scale_neg_1_to_1_with_zero_mean,\n x_dim=x_dim,\n y_dim=y_dim)\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003ehtml = st.produce_pca_explorer(corpus,\n category='democrat',\n category_name='Democratic',\n not_category_name='Republican',\n projection=projection,\n metadata=convention_df['speaker'],\n width_in_pixels=1000,\n scaler=st.scale_neg_1_to_1_with_zero_mean,\n x_dim=x_dim,\n y_dim=y_dim)\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eClick for an interactive visualization.\u003cbr\u003e\n\u003ca href=\"https://jasonkessler.github.io/demo_embeddings_svd_0_1_scale_neg_1_to_1_with_zero_mean.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/6f7a8b248e2e50203363f75fa8e33794f4677c38f18fe42a2d093a2155fc0750/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f737664322e706e67\" alt=\"pca\" data-canonical-src=\"https://jasonkessler.github.io/svd2.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eExporting plot to matplotlib\u003c/h3\u003e\u003ca id=\"user-content-exporting-plot-to-matplotlib\" class=\"anchor\" aria-label=\"Permalink: Exporting plot to matplotlib\" href=\"#exporting-plot-to-matplotlib\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eTo export the content of a scattertext explorer object (ScattertextStructure) to matplotlib you can\nuse \u003ccode\u003eproduce_scattertext_pyplot\u003c/code\u003e. The function returns a \u003ccode\u003ematplotlib.figure.Figure\u003c/code\u003e object which can be visualized\nusing \u003ccode\u003eplt.show\u003c/code\u003e or \u003ccode\u003eplt.savefig\u003c/code\u003e as in the example below.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote that installation of textalloc==0.0.3 and matplotlib\u0026gt;=3.6.0 is required before running this.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"convention_df = st.SampleCorpora.ConventionData2012.get_data().assign(\n\tparse = lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n)\ncorpus = st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse').build()\nscattertext_structure = st.produce_scattertext_explorer(\n\tcorpus,\n\tcategory='democrat',\n\tcategory_name='Democratic',\n\tnot_category_name='Republican',\n\tminimum_term_frequency=5,\n\tpmi_threshold_coefficient=8,\n\twidth_in_pixels=1000,\n\treturn_scatterplot_structure=True,\n)\nfig = st.produce_scattertext_pyplot(scattertext_structure)\nfig.savefig('pyplot_export.png', format='png')\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003econvention_df = st.SampleCorpora.ConventionData2012.get_data().assign(\n\tparse = lambda df: df.text.apply(st.whitespace_nlp_with_sentences)\n)\ncorpus = st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse').build()\nscattertext_structure = st.produce_scattertext_explorer(\n\tcorpus,\n\tcategory='democrat',\n\tcategory_name='Democratic',\n\tnot_category_name='Republican',\n\tminimum_term_frequency=5,\n\tpmi_threshold_coefficient=8,\n\twidth_in_pixels=1000,\n\treturn_scatterplot_structure=True,\n)\nfig = st.produce_scattertext_pyplot(scattertext_structure)\nfig.savefig('pyplot_export.png', format='png')\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e[\u003ca target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"https://camo.githubusercontent.com/0e6b7f37dbfe4f4e50ace8cf8a9a759caf0fafb44665035c7163e03e1148f8e8/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f7079706c6f745f6578706f72742e706e67\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/0e6b7f37dbfe4f4e50ace8cf8a9a759caf0fafb44665035c7163e03e1148f8e8/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f7079706c6f745f6578706f72742e706e67\" alt=\"pyplot\" data-canonical-src=\"https://jasonkessler.github.io/pyplot_export.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e]\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eExamples\u003c/h2\u003e\u003ca id=\"user-content-examples\" class=\"anchor\" aria-label=\"Permalink: Examples\" href=\"#examples\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ePlease see the examples in the \u003ca href=\"https://github.com/JasonKessler/Scattertext-PyData\"\u003ePyData 2017 Tutorial\u003c/a\u003e on\nScattertext.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eA note on chart layout\u003c/h2\u003e\u003ca id=\"user-content-a-note-on-chart-layout\" class=\"anchor\" aria-label=\"Permalink: A note on chart layout\" href=\"#a-note-on-chart-layout\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://github.com/uwplse/cozy\"\u003eCozy: The Collection Synthesizer\u003c/a\u003e (Loncaric 2016) was used to help determine\nwhich terms could be labeled without overlapping a circle or another label. It automatically built a data structure to\nefficiently store and query the locations of each circle and labeled term.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe script to build \u003ccode\u003erectangle-holder.js\u003c/code\u003e was\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"fields ax1 : long, ay1 : long, ax2 : long, ay2 : long\nassume ax1 \u0026lt; ax2 and ay1 \u0026lt; ay2\nquery findMatchingRectangles(bx1 : long, by1 : long, bx2 : long, by2 : long)\n assume bx1 \u0026lt; bx2 and by1 \u0026lt; by2\n ax1 \u0026lt; bx2 and ax2 \u0026gt; bx1 and ay1 \u0026lt; by2 and ay2 \u0026gt; by1\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003efields ax1 : long, ay1 : long, ax2 : long, ay2 : long\nassume ax1 \u0026lt; ax2 and ay1 \u0026lt; ay2\nquery findMatchingRectangles(bx1 : long, by1 : long, bx2 : long, by2 : long)\n assume bx1 \u0026lt; bx2 and by1 \u0026lt; by2\n ax1 \u0026lt; bx2 and ax2 \u0026gt; bx1 and ay1 \u0026lt; by2 and ay2 \u0026gt; by1\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAnd it was called using\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"$ python2.7 src/main.py \u0026lt;script file name\u0026gt; --enable-volume-trees \\\n --js-class RectangleHolder --enable-hamt --enable-arrays --js rectangle_holder.js\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003e$ python2.7 src/main.py \u0026lt;script file name\u0026gt; --enable-volume-trees \\\n --js-class RectangleHolder --enable-hamt --enable-arrays --js rectangle_holder.js\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eWhat's new\u003c/h2\u003e\u003ca id=\"user-content-whats-new\" class=\"anchor\" aria-label=\"Permalink: What's new\" href=\"#whats-new\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.64\u003c/h2\u003e\u003ca id=\"user-content-00264\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.64\" href=\"#00264\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdding in code to ensure that term statistics will show up even if no documents are present in visualization.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.60\u003c/h2\u003e\u003ca id=\"user-content-00260\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.60\" href=\"#00260\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBetter axis labeling (see demo_axis_crossbars_and_labels.py).\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.59\u003c/h2\u003e\u003ca id=\"user-content-00259\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.59\" href=\"#00259\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003ePytextrank compatibility\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.57-58\u003c/h2\u003e\u003ca id=\"user-content-00257-58\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.57-58\" href=\"#00257-58\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eEnsuring Pandas 1.0 compatibility fixing Issue #51 and scikit-learn stopwords import issue in #49.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.44:\u003c/h2\u003e\u003ca id=\"user-content-00244\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.44:\" href=\"#00244\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eAdded the following classes to support rank-based feature-selection: \u003ccode\u003eAssociationCompactorByRank\u003c/code\u003e,\n\u003ccode\u003eTermCategoryRanker\u003c/code\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.43:\u003c/h2\u003e\u003ca id=\"user-content-00243\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.43:\" href=\"#00243\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eMade the term pop-up box on the category pairplot only the category name\u003c/li\u003e\n\u003cli\u003eFixed optimal projection search function\u003c/li\u003e\n\u003cli\u003eMerged PR from @millengustavo to fix when a FutureWarning is issued every time the get_background_frequency_df\nis called.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.42:\u003c/h2\u003e\u003ca id=\"user-content-00242\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.42:\" href=\"#00242\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eFixed clickablity of terms, coloring in certain plots\u003c/li\u003e\n\u003cli\u003eAdded initial number of terms to show in pairplot, using the \u003ccode\u003eterms_to_show\u003c/code\u003e parameter\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.41:\u003c/h2\u003e\u003ca id=\"user-content-00241\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.41:\" href=\"#00241\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eEnabled changing protocol in pair plot\u003c/li\u003e\n\u003cli\u003eFixed semiotic square creator\u003c/li\u003e\n\u003cli\u003eAdded \u003ccode\u003euse_categories_as_metadata_and_replace_terms\u003c/code\u003e to \u003ccode\u003eTermDocMatrix\u003c/code\u003e.\u003c/li\u003e\n\u003cli\u003eAdded \u003ccode\u003eget_metadata_doc_count_df\u003c/code\u003e and \u003ccode\u003eget_metadata_count_mat\u003c/code\u003e to TermDocMatrix\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.40:\u003c/h2\u003e\u003ca id=\"user-content-00240\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.40:\" href=\"#00240\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eAdded categories to terms in pair plot halo, made them clickable\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.39:\u003c/h2\u003e\u003ca id=\"user-content-00239\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.39:\" href=\"#00239\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eFixing failing test case\u003c/li\u003e\n\u003cli\u003eAdding halo to pair plot\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.38:\u003c/h2\u003e\u003ca id=\"user-content-00238\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.38:\" href=\"#00238\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eFixed term preview/clickability in semiotic square plots\u003c/li\u003e\n\u003cli\u003eFixed search box\u003c/li\u003e\n\u003cli\u003eAdded preliminary \u003ccode\u003eproduce_pairplot\u003c/code\u003e\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.37:\u003c/h2\u003e\u003ca id=\"user-content-00237\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.37:\" href=\"#00237\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eJavascript changes to support multiple plots on a single page.\u003c/li\u003e\n\u003cli\u003eAdded \u003ccode\u003eScatterChart.hide_terms(terms: iter[str])\u003c/code\u003e which enables selected terms to be hidden from the chart.\u003c/li\u003e\n\u003cli\u003eAdded \u003ccode\u003eScatterChartData.score_transform\u003c/code\u003e to specify the function which can change an original score into a value\nbetween 0 and 1 used for term coloring.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.36:\u003c/h2\u003e\u003ca id=\"user-content-00236\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.36:\" href=\"#00236\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eAdded \u003ccode\u003ealternative_term_func\u003c/code\u003e to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e which allows you to inject a function that activates\nwhen a term is clicked.\u003c/li\u003e\n\u003cli\u003eFixed Cohen's d calculation, and added \u003ccode\u003eHedgesG\u003c/code\u003e, and unbiased version of Cohen's d which is a subclass of \u003ccode\u003eCohensD\u003c/code\u003e.\u003c/li\u003e\n\u003cli\u003eAdded the \u003ccode\u003efrequency_transform\u003c/code\u003e parameter to \u003ccode\u003eproduce_frequency_explorer\u003c/code\u003e. This defaults to a log transform, but\nallows you to use any way your heart desires to order terms along the x-axis.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.35:\u003c/h2\u003e\u003ca id=\"user-content-00235\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.35:\" href=\"#00235\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003eAdded \u003ccode\u003eshow_category_headings=True\u003c/code\u003e to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e. Setting this to False suppresses the list of\ncategories\nwhich will be displayed in the term context area.\u003c/li\u003e\n\u003cli\u003eAdded \u003ccode\u003ediv_name\u003c/code\u003e argument to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e and name-spaced important divs and classes by \u003ccode\u003ediv_name\u003c/code\u003e\nin HTML templates and Javascript.\u003c/li\u003e\n\u003cli\u003eAdded \u003ccode\u003eshow_cross_axes=True\u003c/code\u003e to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e. Setting this to \u003ccode\u003eFalse\u003c/code\u003e prevents the cross axes\nfrom being displayed if \u003ccode\u003eshow_axes\u003c/code\u003e is \u003ccode\u003eTrue\u003c/code\u003e.\u003c/li\u003e\n\u003cli\u003eChanged default scorer to RankDifference.\u003c/li\u003e\n\u003cli\u003eMade sure that term contexts were properly shown in all configurations.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.34:\u003c/h2\u003e\u003ca id=\"user-content-00234\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.34:\" href=\"#00234\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ccode\u003eTermDocMatrix.get_metadata_freq_df\u003c/code\u003e now accepts the \u003ccode\u003elabel_append\u003c/code\u003e argument which by default adds \u003ccode\u003e' freq'\u003c/code\u003e to the\nend of each column.\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eTermDocMatrix.get_num_cateogires\u003c/code\u003e returns the number of categories in a term-document matrix.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.33:\u003c/h2\u003e\u003ca id=\"user-content-00233\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.33:\" href=\"#00233\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded the following methods:\u003c/p\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e\u003ccode\u003eTermDocMatrixWithoutCategories.get_num_metadata\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eTermDocMatrix.use_metadata_as_categories\u003c/code\u003e\u003c/li\u003e\n\u003cli\u003e\u003ccode\u003eunified_context\u003c/code\u003e argument in \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e lists all contexts in a single column. This let's\nyou see snippets organized by multiple categories in a single column. See \u003ccode\u003edemo_unified_context.py\u003c/code\u003e for an example.\u003cbr\u003e\nhelps category-free or multi-category analyses.\u003c/li\u003e\n\u003c/ul\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.32\u003c/h2\u003e\u003ca id=\"user-content-00232\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.32\" href=\"#00232\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded a series of objects to handle uncategorized corpora. Added section on\n\u003ca href=\"#document-based-scatterplots\"\u003eDocument-Based Scatterplots\u003c/a\u003e, and the add_doc_names_as_metadata function.\n\u003ccode\u003eCategoryColorAssigner\u003c/code\u003e was also added to assign colors to a qualitative categories.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.28-31\u003c/h2\u003e\u003ca id=\"user-content-0028-31\" class=\"anchor\" aria-label=\"Permalink: 0.0.28-31\" href=\"#0028-31\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eA number of new term scoring approaches including \u003ccode\u003eRelativeEntropy\u003c/code\u003e (a direct implementation of Frankhauser et al. (\n2014)), and\n\u003ccode\u003eZScores\u003c/code\u003e and implementation of the Z-Score model used in Frankhauser et al.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ccode\u003eTermDocMatrix.get_metadata_freq_df()\u003c/code\u003e returns a metadata-doc corpus.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ccode\u003eCorpusBasedTermScorer.set_ranker\u003c/code\u003e allows you to use a different term ranker when finding corpus-based scores. This not\nonly\nlets these scorers with metadata, but also allows you to integrate once-per-document counts.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed \u003ccode\u003eproduce_projection_explorer\u003c/code\u003e such that it can work with a predefined set of term embeddings. This can allow,\nfor example, the easy exploration of one hot-encoded term embeddings in addition to\narbitrary lower-dimensional embeddings.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eadd_metadata\u003c/code\u003e to \u003ccode\u003eTermDocMatrix\u003c/code\u003e in order to inject meta data after a TermDocMatrix object\nhas been created.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eMade sure tooltip never started above the top of the web page.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.28\u003c/h3\u003e\u003ca id=\"user-content-00228\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.28\" href=\"#00228\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eDomainCompactor\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.26-27.1\u003c/h3\u003e\u003ca id=\"user-content-00226-271\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.26-27.1\" href=\"#00226-271\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed bug \u003ca href=\"https://github.com/JasonKessler/scattertext/issues/31\" data-hovercard-type=\"issue\" data-hovercard-url=\"/JasonKessler/scattertext/issues/31/hovercard\"\u003e#31\u003c/a\u003e, enabling context to show when metadata value is\nclicked.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eEnabled display of terms in topic models in explorer, along with the the display of\ncustomized topic models. Please see \u003ca href=\"#visualizing-topic-models\"\u003eVisualizing topic models\u003c/a\u003e for an\noverview of the additions.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eRemoved pkg_resources from Phrasemachine, corrected demo_phrase_machine.py\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNow compatible with Gensim 3.4.0.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded characteristic explorer, \u003ccode\u003eproduce_characteristic_explorer\u003c/code\u003e, to plot terms with their characteristic scores on\nthe x-axis and their class-association scores on the y-axis.\nSee \u003ca href=\"#ordering-terms-by-corpus-characteristicness\"\u003eOrdering Terms by Corpus Characteristicness\u003c/a\u003e for more details.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.24-25\u003c/h3\u003e\u003ca id=\"user-content-00224-25\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.24-25\" href=\"#00224-25\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eTermCategoryFrequencies\u003c/code\u003e in response to Issue 23. Please\nsee \u003ca href=\"#visualizing-differences-based-on-only-term-frequencies\"\u003eVisualizing differences based on only term frequencies\u003c/a\u003e\nfor more details.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003ex_axis_labels\u003c/code\u003e and \u003ccode\u003ey_axis_labels\u003c/code\u003e parameters to \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e.\nThese let you include evenly-spaced string axis labels on the chart, as opposed to just\n\"Low\", \"Medium\" and \"High\". These rely on d3's ticks function, which can behave\nunpredictable. Caveat usor.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.16-23.1\u003c/h3\u003e\u003ca id=\"user-content-00216-231\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.16-23.1\" href=\"#00216-231\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eSemiotic Squares now look better, and have customizable labels.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIncorporated the \u003ca href=\"http://www.wjh.harvard.edu/~inquirer/homecat.htm\" rel=\"nofollow\"\u003eGeneral Inquirer\u003c/a\u003e\nlexicon. For non-commercial use only. The lexicon is downloaded from their homepage at the start of each\nuse. See \u003ccode\u003edemo_general_inquierer.py\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIncorporated Phrasemachine from \u003ca href=\"https://github.com/AbeHandler\"\u003eAbeHandler\u003c/a\u003e (Handler et al. 2016). For the license,\nplease see \u003ccode\u003ePhraseMachineLicense.txt\u003c/code\u003e. For an example, please see \u003ccode\u003edemo_phrase_machine.py\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eCompactTerms\u003c/code\u003e for removing redundant and infrequent terms from term document matrices.\nThese occur if a word or phrase is always part of a larger phrase; the shorter phrase is\nconsidered redundant and removed from the corpus. See \u003ccode\u003edemo_phrase_machine.py\u003c/code\u003e for an example.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eFourSquare\u003c/code\u003e, a pattern that allows for the creation of a semiotic square with\nseparate categories for each corner. Please see \u003ccode\u003edemo_four_square.py\u003c/code\u003e for an early example.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFinally, added a way to easily perform T-SNE-style visualizations on a categorized corpus. This uses, by default,\nthe \u003ca href=\"https://github.com/lmcinnes/umap\"\u003eumap-learn\u003c/a\u003e package. Please see demo_tsne_style.py.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed to \u003ccode\u003eScaledFScorePresets(one_to_neg_one=True)\u003c/code\u003e, added \u003ccode\u003eUnigramsFromSpacyDoc\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNow, when using \u003ccode\u003eCorpusFromPandas\u003c/code\u003e, a \u003ccode\u003eCorpusDF\u003c/code\u003e object is returned, instead of a \u003ccode\u003eCorpus\u003c/code\u003e object. This new type of\nobject\nkeeps a reference to the source data frame, and returns it via the \u003ccode\u003eCorpusDF.get_df()\u003c/code\u003e method.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe factory \u003ccode\u003eCorpusFromFeatureDict\u003c/code\u003e was added. It allows you to directly specify term counts and\nmetadata item counts within the dataframe. Please see \u003ccode\u003etest_corpusFromFeatureDict.py\u003c/code\u003e for an example.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.15-16\u003c/h3\u003e\u003ca id=\"user-content-00215-16\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.15-16\" href=\"#00215-16\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded a very semiotic square creator.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe idea to build a semiotic square that contrasts two categories in a Term Document Matrix\nwhile using other categories as neutral categories.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSee \u003ca href=\"#creating-semiotic-squares\"\u003eCreating semiotic squares\u003c/a\u003e for an overview on how to\nuse this functionality and semiotic squares.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded a parameter to disable the display of the top-terms sidebar, e.g.,\n\u003ccode\u003eproduce_scattertext_explorer(..., show_top_terms=False, ...)\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAn interface to part of the subjectivity/sentiment dataset from\nBo Pang and Lillian Lee. ``A Sentimental Education: Sentiment Analysis Using Subjectivity Summarization\nBased on Minimum Cuts''. ACL. 2004. See \u003ccode\u003eSampleCorpora.RottenTomatoes\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed bug that caused tooltip placement to be off after scrolling.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eMade \u003ccode\u003ecategory_name\u003c/code\u003e and \u003ccode\u003enot_category_name\u003c/code\u003e optional in \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e etc.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCreated the ability to customize tooltips via the \u003ccode\u003eget_tooltip_content\u003c/code\u003e argument to\n\u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e etc., control axes labels via \u003ccode\u003ex_axis_values\u003c/code\u003e\nand \u003ccode\u003ey_axis_values\u003c/code\u003e. The \u003ccode\u003ecolor_func\u003c/code\u003e parameter is a Javascript function to control color of a point. Function takes a\nparameter\nwhich is a dictionary entry produced by \u003ccode\u003eScatterChartExplorer.to_dict\u003c/code\u003e and returns a string.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.14\u003c/h3\u003e\u003ca id=\"user-content-00214\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.14\" href=\"#00214\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIntegration with Scikit-Learn's text-analysis pipeline led the creation of the\n\u003ccode\u003eCorpusFromScikit\u003c/code\u003e and \u003ccode\u003eTermDocMatrixFromScikit\u003c/code\u003e classes.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThe \u003ccode\u003eAutoTermSelector\u003c/code\u003e class to automatically suggest terms to appear in the visualization.\u003cbr\u003e\nThis can make it easier to show large data sets, and remove fiddling with the various\nminimum term frequency parameters.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor an example of how to use \u003ccode\u003eCorpusFromScikit\u003c/code\u003e and \u003ccode\u003eAutoTermSelector\u003c/code\u003e, please see \u003ccode\u003edemo_sklearn.py\u003c/code\u003e\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAlso, I updated the library and examples to be compatible with spaCy 2.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed bug when processing single-word documents, and set the default beta to 2.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.11-13\u003c/h3\u003e\u003ca id=\"user-content-00211-13\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.11-13\" href=\"#00211-13\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eproduce_frequency_explorer\u003c/code\u003e function, and adding the PEP 369-compliant\n\u003ccode\u003e__version__\u003c/code\u003e attribute as mentioned in \u003ca href=\"https://github.com/JasonKessler/scattertext/issues/19\" data-hovercard-type=\"issue\" data-hovercard-url=\"/JasonKessler/scattertext/issues/19/hovercard\"\u003e#19\u003c/a\u003e.\nFixed bug when creating visualizations with more than two possible categories. Now, by default,\ncategory names will not be title-cased in the visualization, but will retain their original case.\u003cbr\u003e\nIf you'd still like to do this this, use \u003ccode\u003eScatterChart (or a descendant).to_dict(..., title_case_names=True)\u003c/code\u003e.\nFixed \u003ccode\u003eDocsAndLabelsFromCorpus\u003c/code\u003e for Py 2 compatibility.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.10\u003c/h3\u003e\u003ca id=\"user-content-00210\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.10\" href=\"#00210\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed bugs in \u003ccode\u003echinese_nlp\u003c/code\u003e when jieba has already been imported and in p-value\ncomputation when performing log-odds-ratio w/ prior scoring.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded demo for performing a Monroe et. al (2008) style visualization of\nlog-odds-ratio scores in \u003ccode\u003edemo_log_odds_ratio_prior.py\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.9.*\u003c/h3\u003e\u003ca id=\"user-content-0029\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.9.*\" href=\"#0029\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eBreaking change: \u003ccode\u003epmi_filter_thresold\u003c/code\u003e has been replaced with \u003ccode\u003epmi_threshold_coefficient\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded Emoji and Tweet analysis. See \u003ca href=\"#emoji-analysis\"\u003eEmoji analysis\u003c/a\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eCharacteristic terms falls back ot \"Most frequent\" if no terms used in the chart are present\nin the background corpus.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed top-term calculation for custom scores.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSet scaled f-score's default beta to 0.5.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003e--spacy_language_model\u003c/code\u003e argument to the CLI.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded the \u003ccode\u003ealternative_text_field\u003c/code\u003e option in \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e to show an\nalternative text field when showing contexts in the interactive HTML visualization.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eUpdated \u003ccode\u003eParsedCorpus.get_unigram_corpus\u003c/code\u003e to allow for continued\n\u003ccode\u003ealternative_text_field\u003c/code\u003e functionality.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.8.6\u003c/h3\u003e\u003ca id=\"user-content-00286\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.8.6\" href=\"#00286\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded ability to for Scattertext to use noun chunks instead of unigrams and bigrams through the\n\u003ccode\u003eFeatsFromSpacyDocOnlyNounChunks\u003c/code\u003e class. In order to use it, run your favorite \u003ccode\u003eCorpus\u003c/code\u003e or\n\u003ccode\u003eTermDocMatrix\u003c/code\u003e factory, and pass in an instance of the class as a parameter:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"st.CorpusFromParsedDocuments(..., feats_from_spacy_doc=st.FeatsFromSpacyDocOnlyNounChunks())\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003est.CorpusFromParsedDocuments(..., feats_from_spacy_doc=st.FeatsFromSpacyDocOnlyNounChunks())\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed a bug in corpus construction that occurs when the last document has no features.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.8.5\u003c/h3\u003e\u003ca id=\"user-content-00285\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.8.5\" href=\"#00285\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eNow you don't have to install tinysegmenter to use Scattertext. But you need to\ninstall it if you want to parse Japanese. This caused a problem when Scattertext\nwas being installed on Windows.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.8.1-4\u003c/h3\u003e\u003ca id=\"user-content-00281-4\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.8.1-4\" href=\"#00281-4\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eTermDocMatrix.get_corner_score\u003c/code\u003e, giving an improved version of the\nRudder Score. Exposing \u003ccode\u003ewhitespace_nlp_with_sentences\u003c/code\u003e. It's a lightweight\nbad regex sentence splitter built a top a bad regex tokenizer that somewhat\napes spaCy's API. Use it if you don't have spaCy and the English model\ndownloaded or if you care more about memory footprint and speed than accuracy.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIt's not compatible with \u003ccode\u003eword_similarity_explorer\u003c/code\u003e but is compatible with\n`word_similarity_explorer_gensim'.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eTweaked scaled f-score normalization.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed Javascript bug when clicking on '$'.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.8.0\u003c/h3\u003e\u003ca id=\"user-content-00280\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.8.0\" href=\"#00280\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed bug in Scaled F-Score computations, and changed computation to better score words that are inversely correlated to\ncategory.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eWord2VecFromParsedCorpus\u003c/code\u003e to automate training Gensim word vectors from a corpus, and\u003cbr\u003e\n\u003ccode\u003eword_similarity_explorer_gensim\u003c/code\u003e to produce the visualization.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSee \u003ccode\u003edemo_gensim_similarity.py\u003c/code\u003e for an example.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.7.1\u003c/h3\u003e\u003ca id=\"user-content-00271\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.7.1\" href=\"#00271\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded the \u003ccode\u003ed3_url\u003c/code\u003e and \u003ccode\u003ed3_scale_chromatic_url\u003c/code\u003e parameters to\n\u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e. This provides a way to manually specify the paths to \"d3.js\"\n(i.e., the file from \"\u003ca href=\"https://cdnjs.cloudflare.com/ajax/libs/d3/4.6.0/d3.min.js\" rel=\"nofollow\"\u003ehttps://cdnjs.cloudflare.com/ajax/libs/d3/4.6.0/d3.min.js\u003c/a\u003e\") and\n\"d3-scale-chromatic.v1.js\" (i.e., the file from \"\u003ca href=\"https://d3js.org/d3-scale-chromatic.v1.min.js\" rel=\"nofollow\"\u003ehttps://d3js.org/d3-scale-chromatic.v1.min.js\u003c/a\u003e\").\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eThis is important if you're getting the error:\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"Javascript error adding output!\nTypeError: d3.scaleLinear is not a function\nSee your browser Javascript console for more details.\"\u003e\u003cpre class=\"notranslate\"\u003e\u003ccode\u003eJavascript error adding output!\nTypeError: d3.scaleLinear is not a function\nSee your browser Javascript console for more details.\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eIt also lets you use Scattertext if you're serving in an environment with no (or a restricted)\nexternal Internet connection.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor example, if \"d3.min.js\" and \"d3-scale-chromatic.v1.min.js\" were present in the current\nworking directory, calling the following code would reference them locally instead of\nthe remote Javascript files. See \u003ca href=\"#visualizing-term-associations\"\u003eVisualizing term associations\u003c/a\u003e\nfor code context.\u003c/p\u003e\n\u003cdiv class=\"snippet-clipboard-content notranslate position-relative overflow-auto\" data-snippet-clipboard-copy-content=\"\u0026gt;\u0026gt;\u0026gt; html = st.produce_scattertext_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'],\n... d3_url='d3.min.js',\n... d3_scale_chromatic_url='d3-scale-chromatic.v1.min.js')\"\u003e\u003cpre lang=\"pydocstring\" class=\"notranslate\"\u003e\u003ccode\u003e\u0026gt;\u0026gt;\u0026gt; html = st.produce_scattertext_explorer(corpus,\n... category='democrat',\n... category_name='Democratic',\n... not_category_name='Republican',\n... width_in_pixels=1000,\n... metadata=convention_df['speaker'],\n... d3_url='d3.min.js',\n... d3_scale_chromatic_url='d3-scale-chromatic.v1.min.js')\n\u003c/code\u003e\u003c/pre\u003e\u003c/div\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.7.0\u003c/h3\u003e\u003ca id=\"user-content-00270\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.7.0\" href=\"#00270\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed a bug in 0.0.2.6.0 that transposed default axis labels.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded a Japanese mode to Scattertext. See \u003ccode\u003edemo_japanese.py\u003c/code\u003e for an example of\nhow to use Japanese. Please run \u003ccode\u003epip install tinysegmenter\u003c/code\u003e to parse Japanese.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAlso, the \u003ccode\u003echiense_mode\u003c/code\u003e boolean parameter in\n\u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e has been renamed to \u003ccode\u003easian_mode\u003c/code\u003e.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFor example, the output of \u003ccode\u003edemo_japanese.py\u003c/code\u003e is:\n\u003ca href=\"https://jasonkessler.github.io/demo_japanese.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/716b080413e9cf82b732f14a19e79a886dc1a47b4fc5f7a8a0619590a4a76707/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f6a6170616e6573652e706e67\" alt=\"demo_japanese.html\" data-canonical-src=\"https://jasonkessler.github.io/demo_japanese.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.6.0\u003c/h3\u003e\u003ca id=\"user-content-00260-1\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.6.0\" href=\"#00260-1\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCustom term positions and axis labels. Although not recommended, you can\nvisualize different metrics on each axis in visualizations similar to Monroe et al. (2008).\nPlease see \u003ca href=\"#custom-term-positions\"\u003eCustom term positions\u003c/a\u003e for more info.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.5.0\u003c/h3\u003e\u003ca id=\"user-content-00250\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.5.0\" href=\"#00250\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eEnhanced the visualization of query-based categorical differences, a.k.a the \u003ccode\u003eword_similarity_explorer\u003c/code\u003e\nfunction. When run, a plot is produced that contains category associated terms\ncolored in either red or blue hues, and terms not associated with either class\ncolored in greyscale and slightly smaller. The intensity of each color indicates\nassociation with the query term. For example:\u003c/p\u003e\n\u003cp dir=\"auto\"\u003e\u003ca href=\"https://jasonkessler.github.io/Convention-Visualization-Jobs.html\" rel=\"nofollow\"\u003e\u003cimg src=\"https://camo.githubusercontent.com/fddd4924eacbf55f3b6f43c1eef3f7a67732e2c45e78ad1915acba29b3b369ae/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e732d56697a2d4578706c616e6174696f6e2e706e67\" alt=\"Convention-Visualization-Jobs.html\" data-canonical-src=\"https://jasonkessler.github.io/Conventions-Viz-Explanation.png\" style=\"max-width: 100%;\"\u003e\u003c/a\u003e\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.4.6\u003c/h3\u003e\u003ca id=\"user-content-00246\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.4.6\" href=\"#00246\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eSome minor bug fixes, and added a \u003ccode\u003eminimum_not_category_term_frequency\u003c/code\u003e parameter. This fixes a problem with\nvisualizing imbalanced datasets. It sets a minimum number of times a word that does not appear in the target\ncategory must appear before it is displayed.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded \u003ccode\u003eTermDocMatrix.remove_entity_tags\u003c/code\u003e method to remove entity type tags\nfrom the analysis.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.4.5\u003c/h3\u003e\u003ca id=\"user-content-00245\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.4.5\" href=\"#00245\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed matched snippet not displaying issue \u003ca href=\"/JasonKessler/scattertext/issues/9\"\u003e#9\u003c/a\u003e, and fixed a Python 2 issue\nin created a visualization using a \u003ccode\u003eParsedCorpus\u003c/code\u003e prepared via \u003ccode\u003eCorpusFromParsedDocuments\u003c/code\u003e, mentioned\nin the latter part of the issue \u003ca href=\"/JasonKessler/scattertext/issues/8\"\u003e#8\u003c/a\u003e discussion.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAgain, Python 2 is supported in experimental mode only.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.4.4\u003c/h3\u003e\u003ca id=\"user-content-00244-1\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.4.4\" href=\"#00244-1\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eCorrected example links on this Readme.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed a bug in Issue 8 where the HTML visualization produced by \u003ccode\u003eproduce_scattertext_html\u003c/code\u003e would fail.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.4.2\u003c/h3\u003e\u003ca id=\"user-content-00242-1\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.4.2\" href=\"#00242-1\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed a couple issues that rendered Scattertext broken in Python 2. Chinese processing still does not work.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eNote: Use Python 3.4+ if you can.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.4.1\u003c/h3\u003e\u003ca id=\"user-content-00241-1\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.4.1\" href=\"#00241-1\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eFixed links in Readme, and made regex NLP available in CLI.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.4\u003c/h3\u003e\u003ca id=\"user-content-0024\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.4\" href=\"#0024\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded the command line tool, and fixed a bug related to Empath visualizations.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.3\u003c/h3\u003e\u003ca id=\"user-content-0023\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.3\" href=\"#0023\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAbility to see how a particular term is discussed differently between categories\nthrough the \u003ccode\u003eword_similarity_explorer\u003c/code\u003e function.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eSpecialized mode to view sparse term scores.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eFixed a bug that was caused by repeated values in background unigram counts.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded true alphabetical term sorting in visualizations.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eAdded an optional save-as-SVG button.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.2\u003c/h3\u003e\u003ca id=\"user-content-0022\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.2\" href=\"#0022\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAddition option of showing characteristic terms (from the full set of documents) being considered.\nThe option (\u003ccode\u003eshow_characteristic\u003c/code\u003e in \u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e) is on by default,\nbut currently unavailable for Chinese. If you know of a good Chinese wordcount list,\nplease let me know. The algorithm used to produce these is F-Score.\u003cbr\u003e\nSee \u003ca href=\"http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/58\" rel=\"nofollow\"\u003ethis and the following slide\u003c/a\u003e\nfor more details\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.1.5\u003c/h3\u003e\u003ca id=\"user-content-00215\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.1.5\" href=\"#00215\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded document and word count statistics to main visualization.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.1.4\u003c/h3\u003e\u003ca id=\"user-content-00214-1\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.1.4\" href=\"#00214-1\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAdded preliminary support for visualizing \u003ca href=\"https://github.com/Ejhfast/empath-client\"\u003eEmpath\u003c/a\u003e (Fast 2016) topics\ncategories instead of emotions. See the tutorial for more information.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.1.3\u003c/h3\u003e\u003ca id=\"user-content-00213\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.1.3\" href=\"#00213\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eImproved term-labeling.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.1.1\u003c/h3\u003e\u003ca id=\"user-content-00211\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.1.1\" href=\"#00211\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eAddition of \u003ccode\u003estrip_final_period\u003c/code\u003e param to \u003ccode\u003eFeatsFromSpacyDoc\u003c/code\u003e to deal with spaCy\ntokenization of all-caps documents that can leave periods at the end of terms.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch3 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003e0.0.2.1.0\u003c/h3\u003e\u003ca id=\"user-content-00210-1\" class=\"anchor\" aria-label=\"Permalink: 0.0.2.1.0\" href=\"#00210-1\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cp dir=\"auto\"\u003eI've added support for Chinese, including the ChineseNLP class, which uses a RegExp-based\nsentence splitter and \u003ca href=\"https://github.com/fxsjy/jieba\"\u003eJieba\u003c/a\u003e for word\nsegmentation. To use it, see the \u003ccode\u003edemo_chinese.py\u003c/code\u003e file. Note that \u003ccode\u003eCorpusFromPandas\u003c/code\u003e\ncurrently does not support ChineseNLP.\u003c/p\u003e\n\u003cp dir=\"auto\"\u003eIn order for the visualization to work, set the \u003ccode\u003easian_mode\u003c/code\u003e flag to \u003ccode\u003eTrue\u003c/code\u003e in\n\u003ccode\u003eproduce_scattertext_explorer\u003c/code\u003e.\u003c/p\u003e\n\u003cdiv class=\"markdown-heading\" dir=\"auto\"\u003e\u003ch2 tabindex=\"-1\" class=\"heading-element\" dir=\"auto\"\u003eSources\u003c/h2\u003e\u003ca id=\"user-content-sources\" class=\"anchor\" aria-label=\"Permalink: Sources\" href=\"#sources\"\u003e\u003csvg class=\"octicon octicon-link\" viewBox=\"0 0 16 16\" version=\"1.1\" width=\"16\" height=\"16\" aria-hidden=\"true\"\u003e\u003cpath d=\"m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z\"\u003e\u003c/path\u003e\u003c/svg\u003e\u003c/a\u003e\u003c/div\u003e\n\u003cul dir=\"auto\"\u003e\n\u003cli\u003e2012 Convention Data: scraped\nfrom \u003ca href=\"http://www.nytimes.com/interactive/2012/09/06/us/politics/convention-word-counts.html?_r=0\" rel=\"nofollow\"\u003eThe New York Times.\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003ecount_1w: Peter Norvig assembled this file (downloaded from \u003ca href=\"http://norvig.com/ngrams/count_1w.txt\" rel=\"nofollow\"\u003enorvig.com\u003c/a\u003e).\nSee \u003ca href=\"http://norvig.com/ngrams/\" rel=\"nofollow\"\u003ehttp://norvig.com/ngrams/\u003c/a\u003e for an explanation of how it was gathered from a very large corpus.\u003c/li\u003e\n\u003cli\u003ehamlet.txt: William Shakespeare. From \u003ca href=\"http://shakespeare.mit.edu/hamlet/full.html\" rel=\"nofollow\"\u003eshapespeare.mit.edu\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eInspiration for text scatter plots: Rudder, Christian. Dataclysm: Who We Are (When We Think No One's Looking). Random\nHouse Incorporated, 2014.\u003c/li\u003e\n\u003cli\u003eLoncaric, Calvin. \"Cozy: synthesizing collection data structures.\" Proceedings of the 2016 24th ACM SIGSOFT\nInternational Symposium on Foundations of Software Engineering. ACM, 2016.\u003c/li\u003e\n\u003cli\u003eFast, Ethan, Binbin Chen, and Michael S. Bernstein. \"Empath: Understanding topic signals in large-scale text.\"\nProceedings of the 2016 CHI Conference on Human Factors in Computing Systems. ACM, 2016.\u003c/li\u003e\n\u003cli\u003eBurt L. Monroe, Michael P. Colaresi, and Kevin M. Quinn. 2008. Fightin’ words: Lexical feature selection and\nevaluation for identifying the content of political conflict. Political Analysis.\u003c/li\u003e\n\u003cli\u003eBo Pang and Lillian Lee. A Sentimental Education: Sentiment Analysis Using Subjectivity Summarization Based on Minimum\nCuts, Proceedings of the ACL, 2004.\u003c/li\u003e\n\u003cli\u003eAbram Handler, Matt Denny, Hanna Wallach, and Brendan O'Connor. Bag of what? Simple noun phrase extraction for corpus\nanalysis. NLP+CSS Workshop at EMNLP 2016.\u003c/li\u003e\n\u003cli\u003ePeter Fankhauser, Jörg Knappen, Elke Teich. Exploring and visualizing variation in language resources. LREC 2014.\u003c/li\u003e\n\u003cli\u003eShinichi Nakagawa and Innes C. Cuthill. Effect size, confidence interval and statistical significance: a practical\nguide for biologists. 2007. In Biological Reviews 82.\u003c/li\u003e\n\u003cli\u003eCynthia M. Whissell. The dictionary of affect in language. 1993. In The Measurement of Emotions.\u003c/li\u003e\n\u003cli\u003eDavid Bamman, Jacob Eisenstein, and Tyler Schnoebelen. GENDER IDENTITY AND LEXICAL VARIATION IN SOCIAL MEDIA. 2014.\u003c/li\u003e\n\u003cli\u003eRada Mihalcea, Paul Tarau. TextRank: Bringing Order into Text. EMNLP. 2004.\u003c/li\u003e\n\u003cli\u003eFrimer, J. A., Boghrati, R., Haidt, J., Graham, J., \u0026amp; Dehgani, M. Moral Foundations Dictionary for Linguistic Analyses\n2.0. Unpublished manuscript. 2019.\u003c/li\u003e\n\u003cli\u003eJesse Graham, Jonathan Haidt, Sena Koleva, Matt Motyl, Ravi Iyer, Sean P Wojcik, and Peter H Ditto. 2013. Moral\nfoundations theory: The pragmatic validity of moral pluralism. Advances in Experimental Social Psychology, 47, 55-130\u003c/li\u003e\n\u003cli\u003eRyan J. Gallagher, Morgan R. Frank, Lewis Mitchell, Aaron J. Schwartz, Andrew J. Reagan, Christopher M. Danforth, and\nPeter Sheridan Dodds. Generalized Word Shift Graphs: A Method for Visualizing and Explaining Pairwise Comparisons\nBetween Texts. 2020. Arxiv. \u003ca href=\"https://arxiv.org/pdf/2008.02250.pdf\" rel=\"nofollow\"\u003ehttps://arxiv.org/pdf/2008.02250.pdf\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eKocoń, Jan; Zaśko-Zielińska, Monika and Miłkowski, Piotr, 2019, PolEmo 2.0 Sentiment Analysis Dataset for CoNLL,\nCLARIN-PL digital repository, \u003ca href=\"http://hdl.handle.net/11321/710\" rel=\"nofollow\"\u003ehttp://hdl.handle.net/11321/710\u003c/a\u003e.\u003c/li\u003e\n\u003cli\u003eGeorge Forman. 2008. BNS feature scaling: an improved representation over tf-idf for svm text classification. In\nProceedings of the 17th ACM conference on Information and knowledge management (CIKM '08). Association for Computing\nMachinery, New York, NY, USA, 263–270. \u003ca href=\"https://doi.org/10.1145/1458082.1458119\" rel=\"nofollow\"\u003ehttps://doi.org/10.1145/1458082.1458119\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eAnne-Kathrin Schumann. 2016. Brave new world: Uncovering topical dynamics in the ACL Anthology reference corpus using\nterm life cycle information. In Proceedings of the 10th SIGHUM Workshop on Language Technology for Cultural Heritage,\nSocial Sciences, and Humanities, pages 1–11, Berlin, Germany. Association for Computational Linguistics.\u003c/li\u003e\n\u003cli\u003ePiao, S. S., Bianchi, F., Dayrell, C., D’egidio, A., \u0026amp; Rayson, P. 2015. Development of the multilingual semantic\nannotation system. In Proceedings of the 2015 Conference of the North American Chapter of the Association for\nComputational Linguistics: Human Language Technologies (pp. 1268-1274).\u003c/li\u003e\n\u003cli\u003eCliff, N. (1993). Dominance statistics: Ordinal analyses to answer ordinal questions. Psychological Bulletin, 114(3), 494–509. \u003ca href=\"https://doi.org/10.1037/0033-2909.114.3.494\" rel=\"nofollow\"\u003ehttps://doi.org/10.1037/0033-2909.114.3.494\u003c/a\u003e\u003c/li\u003e\n\u003cli\u003eAltmann EG, Pierrehumbert JB, Motter AE (2011) Niche as a Determinant of Word Fate in Online Groups. PLoS ONE 6(5): e19009. \u003ca href=\"https://doi.org/10.1371/journal.pone.0019009\" rel=\"nofollow\"\u003ehttps://doi.org/10.1371/journal.pone.0019009\u003c/a\u003e.\u003c/li\u003e\n\u003c/ul\u003e\n\u003c/article\u003e","loaded":true,"timedOut":false,"errorMessage":null,"headerInfo":{"toc":[{"level":1,"text":"Scattertext 0.2.2","anchor":"scattertext-022","htmlText":"Scattertext 0.2.2"},{"level":2,"text":"Citation","anchor":"citation","htmlText":"Citation"},{"level":2,"text":"Installation","anchor":"installation","htmlText":"Installation"},{"level":2,"text":"Style Guide","anchor":"style-guide","htmlText":"Style Guide"},{"level":2,"text":"Overview","anchor":"overview","htmlText":"Overview"},{"level":2,"text":"Customizing the Visualization and Plotting Dispersion","anchor":"customizing-the-visualization-and-plotting-dispersion","htmlText":"Customizing the Visualization and Plotting Dispersion"},{"level":2,"text":"Tutorial","anchor":"tutorial","htmlText":"Tutorial"},{"level":3,"text":"Help! I don't know Python but I still want to use Scattertext.","anchor":"help-i-dont-know-python-but-i-still-want-to-use-scattertext","htmlText":"Help! I don't know Python but I still want to use Scattertext."},{"level":3,"text":"Using Scattertext as a text analysis library: finding characteristic terms and their associations","anchor":"using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations","htmlText":"Using Scattertext as a text analysis library: finding characteristic terms and their associations"},{"level":3,"text":"Visualizing term associations","anchor":"visualizing-term-associations","htmlText":"Visualizing term associations"},{"level":3,"text":"Visualizing Phrase associations","anchor":"visualizing-phrase-associations","htmlText":"Visualizing Phrase associations"},{"level":4,"text":"Using PyTextRank","anchor":"using-pytextrank","htmlText":"Using PyTextRank"},{"level":4,"text":"Using Phrasemachine to find phrases.","anchor":"using-phrasemachine-to-find-phrases","htmlText":"Using Phrasemachine to find phrases."},{"level":3,"text":"Adding color gradients to explain scores","anchor":"adding-color-gradients-to-explain-scores","htmlText":"Adding color gradients to explain scores"},{"level":3,"text":"Visualizing Empath topics and categories","anchor":"visualizing-empath-topics-and-categories","htmlText":"Visualizing Empath topics and categories"},{"level":3,"text":"Visualizing the Moral Foundations 2.0 Dictionary","anchor":"visualizing-the-moral-foundations-20-dictionary","htmlText":"Visualizing the Moral Foundations 2.0 Dictionary"},{"level":3,"text":"Ordering Terms by Corpus Characteristicness","anchor":"ordering-terms-by-corpus-characteristicness","htmlText":"Ordering Terms by Corpus Characteristicness"},{"level":3,"text":"Document-Based Scatterplots","anchor":"document-based-scatterplots","htmlText":"Document-Based Scatterplots"},{"level":3,"text":"Using Cohen's d or Hedge's g to visualize effect size.","anchor":"using-cohens-d-or-hedges-g-to-visualize-effect-size","htmlText":"Using Cohen's d or Hedge's g to visualize effect size."},{"level":3,"text":"Using Cliff's Delta to visualize effect size","anchor":"using-cliffs-delta-to-visualize-effect-size","htmlText":"Using Cliff's Delta to visualize effect size"},{"level":3,"text":"Using Bi-Normal Separation (BNS) to score terms","anchor":"using-bi-normal-separation-bns-to-score-terms","htmlText":"Using Bi-Normal Separation (BNS) to score terms"},{"level":3,"text":"Using correlations to explain classifiers","anchor":"using-correlations-to-explain-classifiers","htmlText":"Using correlations to explain classifiers"},{"level":3,"text":"Using Custom Background Word Frequencies","anchor":"using-custom-background-word-frequencies","htmlText":"Using Custom Background Word Frequencies"},{"level":3,"text":"Plotting word productivity","anchor":"plotting-word-productivity","htmlText":"Plotting word productivity"},{"level":3,"text":"Understanding Scaled F-Score","anchor":"understanding-scaled-f-score","htmlText":"Understanding Scaled F-Score"},{"level":3,"text":"Alternative term scoring methods","anchor":"alternative-term-scoring-methods","htmlText":"Alternative term scoring methods"},{"level":3,"text":"The position-select-plot process","anchor":"the-position-select-plot-process","htmlText":"The position-select-plot process"},{"level":2,"text":"Advanced uses","anchor":"advanced-uses","htmlText":"Advanced uses"},{"level":3,"text":"Visualizing differences based on only term frequencies","anchor":"visualizing-differences-based-on-only-term-frequencies","htmlText":"Visualizing differences based on only term frequencies"},{"level":3,"text":"Visualizing query-based categorical differences","anchor":"visualizing-query-based-categorical-differences","htmlText":"Visualizing query-based categorical differences"},{"level":4,"text":"Developing and using bespoke word representations","anchor":"developing-and-using-bespoke-word-representations","htmlText":"Developing and using bespoke word representations"},{"level":3,"text":"Visualizing any kind of term score","anchor":"visualizing-any-kind-of-term-score","htmlText":"Visualizing any kind of term score"},{"level":3,"text":"Custom term positions","anchor":"custom-term-positions","htmlText":"Custom term positions"},{"level":3,"text":"Emoji analysis","anchor":"emoji-analysis","htmlText":"Emoji analysis"},{"level":3,"text":"Visualizing SentencePiece Tokens","anchor":"visualizing-sentencepiece-tokens","htmlText":"Visualizing SentencePiece Tokens"},{"level":3,"text":"Visualizing scikit-learn text classification weights","anchor":"visualizing-scikit-learn-text-classification-weights","htmlText":"Visualizing scikit-learn text classification weights"},{"level":3,"text":"Creating lexicalized semiotic squares","anchor":"creating-lexicalized-semiotic-squares","htmlText":"Creating lexicalized semiotic squares"},{"level":3,"text":"Visualizing Topic Models","anchor":"visualizing-topic-models","htmlText":"Visualizing Topic Models"},{"level":4,"text":"Manually entered topic models","anchor":"manually-entered-topic-models","htmlText":"Manually entered topic models"},{"level":4,"text":"Using Scikit-Learn for Topic Modeling","anchor":"using-scikit-learn-for-topic-modeling","htmlText":"Using Scikit-Learn for Topic Modeling"},{"level":4,"text":"Using a Word List to Generate a Series of Topics","anchor":"using-a-word-list-to-generate-a-series-of-topics","htmlText":"Using a Word List to Generate a Series of Topics"},{"level":3,"text":"Creating T-SNE-style word embedding projection plots","anchor":"creating-t-sne-style-word-embedding-projection-plots","htmlText":"Creating T-SNE-style word embedding projection plots"},{"level":3,"text":"Using SVD to visualize any kind of word embeddings","anchor":"using-svd-to-visualize-any-kind-of-word-embeddings","htmlText":"Using SVD to visualize any kind of word embeddings"},{"level":3,"text":"Exporting plot to matplotlib","anchor":"exporting-plot-to-matplotlib","htmlText":"Exporting plot to matplotlib"},{"level":2,"text":"Examples","anchor":"examples","htmlText":"Examples"},{"level":2,"text":"A note on chart layout","anchor":"a-note-on-chart-layout","htmlText":"A note on chart layout"},{"level":2,"text":"What's new","anchor":"whats-new","htmlText":"What's new"},{"level":2,"text":"0.0.2.64","anchor":"00264","htmlText":"0.0.2.64"},{"level":2,"text":"0.0.2.60","anchor":"00260","htmlText":"0.0.2.60"},{"level":2,"text":"0.0.2.59","anchor":"00259","htmlText":"0.0.2.59"},{"level":2,"text":"0.0.2.57-58","anchor":"00257-58","htmlText":"0.0.2.57-58"},{"level":2,"text":"0.0.2.44:","anchor":"00244","htmlText":"0.0.2.44:"},{"level":2,"text":"0.0.2.43:","anchor":"00243","htmlText":"0.0.2.43:"},{"level":2,"text":"0.0.2.42:","anchor":"00242","htmlText":"0.0.2.42:"},{"level":2,"text":"0.0.2.41:","anchor":"00241","htmlText":"0.0.2.41:"},{"level":2,"text":"0.0.2.40:","anchor":"00240","htmlText":"0.0.2.40:"},{"level":2,"text":"0.0.2.39:","anchor":"00239","htmlText":"0.0.2.39:"},{"level":2,"text":"0.0.2.38:","anchor":"00238","htmlText":"0.0.2.38:"},{"level":2,"text":"0.0.2.37:","anchor":"00237","htmlText":"0.0.2.37:"},{"level":2,"text":"0.0.2.36:","anchor":"00236","htmlText":"0.0.2.36:"},{"level":2,"text":"0.0.2.35:","anchor":"00235","htmlText":"0.0.2.35:"},{"level":2,"text":"0.0.2.34:","anchor":"00234","htmlText":"0.0.2.34:"},{"level":2,"text":"0.0.2.33:","anchor":"00233","htmlText":"0.0.2.33:"},{"level":2,"text":"0.0.2.32","anchor":"00232","htmlText":"0.0.2.32"},{"level":2,"text":"0.0.28-31","anchor":"0028-31","htmlText":"0.0.28-31"},{"level":3,"text":"0.0.2.28","anchor":"00228","htmlText":"0.0.2.28"},{"level":3,"text":"0.0.2.26-27.1","anchor":"00226-271","htmlText":"0.0.2.26-27.1"},{"level":3,"text":"0.0.2.24-25","anchor":"00224-25","htmlText":"0.0.2.24-25"},{"level":3,"text":"0.0.2.16-23.1","anchor":"00216-231","htmlText":"0.0.2.16-23.1"},{"level":3,"text":"0.0.2.15-16","anchor":"00215-16","htmlText":"0.0.2.15-16"},{"level":3,"text":"0.0.2.14","anchor":"00214","htmlText":"0.0.2.14"},{"level":3,"text":"0.0.2.11-13","anchor":"00211-13","htmlText":"0.0.2.11-13"},{"level":3,"text":"0.0.2.10","anchor":"00210","htmlText":"0.0.2.10"},{"level":3,"text":"0.0.2.9.*","anchor":"0029","htmlText":"0.0.2.9.*"},{"level":3,"text":"0.0.2.8.6","anchor":"00286","htmlText":"0.0.2.8.6"},{"level":3,"text":"0.0.2.8.5","anchor":"00285","htmlText":"0.0.2.8.5"},{"level":3,"text":"0.0.2.8.1-4","anchor":"00281-4","htmlText":"0.0.2.8.1-4"},{"level":3,"text":"0.0.2.8.0","anchor":"00280","htmlText":"0.0.2.8.0"},{"level":3,"text":"0.0.2.7.1","anchor":"00271","htmlText":"0.0.2.7.1"},{"level":3,"text":"0.0.2.7.0","anchor":"00270","htmlText":"0.0.2.7.0"},{"level":3,"text":"0.0.2.6.0","anchor":"00260-1","htmlText":"0.0.2.6.0"},{"level":3,"text":"0.0.2.5.0","anchor":"00250","htmlText":"0.0.2.5.0"},{"level":3,"text":"0.0.2.4.6","anchor":"00246","htmlText":"0.0.2.4.6"},{"level":3,"text":"0.0.2.4.5","anchor":"00245","htmlText":"0.0.2.4.5"},{"level":3,"text":"0.0.2.4.4","anchor":"00244-1","htmlText":"0.0.2.4.4"},{"level":3,"text":"0.0.2.4.2","anchor":"00242-1","htmlText":"0.0.2.4.2"},{"level":3,"text":"0.0.2.4.1","anchor":"00241-1","htmlText":"0.0.2.4.1"},{"level":3,"text":"0.0.2.4","anchor":"0024","htmlText":"0.0.2.4"},{"level":3,"text":"0.0.2.3","anchor":"0023","htmlText":"0.0.2.3"},{"level":3,"text":"0.0.2.2","anchor":"0022","htmlText":"0.0.2.2"},{"level":3,"text":"0.0.2.1.5","anchor":"00215","htmlText":"0.0.2.1.5"},{"level":3,"text":"0.0.2.1.4","anchor":"00214-1","htmlText":"0.0.2.1.4"},{"level":3,"text":"0.0.2.1.3","anchor":"00213","htmlText":"0.0.2.1.3"},{"level":3,"text":"0.0.2.1.1","anchor":"00211","htmlText":"0.0.2.1.1"},{"level":3,"text":"0.0.2.1.0","anchor":"00210-1","htmlText":"0.0.2.1.0"},{"level":2,"text":"Sources","anchor":"sources","htmlText":"Sources"}],"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FJasonKessler%2Fscattertext"}},{"displayName":"LICENSE","repoName":"scattertext","refName":"master","path":"LICENSE","preferredFileType":"license","tabName":"Apache-2.0","richText":null,"loaded":false,"timedOut":false,"errorMessage":null,"headerInfo":{"toc":null,"siteNavLoginPath":"/login?return_to=https%3A%2F%2Fgithub.com%2FJasonKessler%2Fscattertext"}}],"overviewFilesProcessingTime":0}},"appPayload":{"helpUrl":"https://docs.github.com","findFileWorkerPath":"/assets-cdn/worker/find-file-worker-7d7eb7c71814.js","findInFileWorkerPath":"/assets-cdn/worker/find-in-file-worker-96e76d5fdb2c.js","githubDevUrl":null,"enabled_features":{"copilot_workspace":null,"code_nav_ui_events":false,"overview_shared_code_dropdown_button":false,"react_blob_overlay":false,"accessible_code_button":true,"github_models_repo_integration":false}}}}</script> <div data-target="react-partial.reactRoot"><style data-styled="true" data-styled-version="5.3.11">.iVEunk{margin-top:16px;margin-bottom:16px;}/*!sc*/ .jzuOtQ{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;}/*!sc*/ .bGojzy{margin-bottom:0;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;row-gap:16px;}/*!sc*/ .iNSVHo{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;padding-bottom:16px;padding-top:8px;}/*!sc*/ .bVgnfw{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:row;-ms-flex-direction:row;flex-direction:row;gap:8px;}/*!sc*/ @media screen and (max-width:320px){.bVgnfw{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}}/*!sc*/ .CEgMp{position:relative;}/*!sc*/ @media screen and (max-width:380px){.CEgMp .ref-selector-button-text-container{max-width:80px;}}/*!sc*/ @media screen and (max-width:320px){.CEgMp{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}.CEgMp .overview-ref-selector{width:100%;}.CEgMp .overview-ref-selector > span{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-pack:start;-webkit-justify-content:flex-start;-ms-flex-pack:start;justify-content:flex-start;}.CEgMp .overview-ref-selector > span > span[data-component="text"]{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}}/*!sc*/ .gMOVLe[data-size="medium"]{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:0;}/*!sc*/ .gMOVLe[data-size="medium"] svg{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .gMOVLe[data-size="medium"] > span{width:inherit;}/*!sc*/ .gUkoLg{-webkit-box-pack:center;-webkit-justify-content:center;-ms-flex-pack:center;justify-content:center;}/*!sc*/ .bZBlpz{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;width:100%;}/*!sc*/ .lhTYNA{margin-right:4px;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .ffLUq{font-size:14px;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;}/*!sc*/ .bmcJak{min-width:0;}/*!sc*/ .fLXEGX{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (max-width:1079px){.fLXEGX{display:none;}}/*!sc*/ .lmSMZJ[data-size="medium"]{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));padding-left:4px;padding-right:4px;}/*!sc*/ .lmSMZJ[data-size="medium"] span[data-component="leadingVisual"]{margin-right:4px !important;}/*!sc*/ .dqfxud{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:1080px){.dqfxud{display:none;}}/*!sc*/ @media screen and (max-width:543px){.dqfxud{display:none;}}/*!sc*/ .fGwBZA[data-size="medium"][data-no-visuals]{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));}/*!sc*/ .jxTzTd{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;padding-left:8px;gap:8px;}/*!sc*/ .gqqBXN{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;gap:8px;}/*!sc*/ @media screen and (max-width:543px){.gqqBXN{display:none;}}/*!sc*/ .dzXgxt{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (max-width:1011px){.dzXgxt{display:none;}}/*!sc*/ .iWFGlI{margin-left:8px;margin-right:8px;margin:0;}/*!sc*/ .vcvyP{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;min-width:160px;}/*!sc*/ .YUPas{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:1012px){.YUPas{display:none;}}/*!sc*/ .izFOf{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;}/*!sc*/ @media screen and (min-width:544px){.izFOf{display:none;}}/*!sc*/ .vIPPs{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;gap:16px;}/*!sc*/ .fdROMU{width:100%;border-collapse:separate;border-spacing:0;border:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));border-radius:6px;table-layout:fixed;overflow:unset;}/*!sc*/ .jGKpsv{height:0px;line-height:0px;}/*!sc*/ .jGKpsv tr{height:0px;font-size:0px;}/*!sc*/ .jdgHnn{padding:16px;color:var(--fgColor-muted,var(--color-fg-muted,#656d76));font-size:12px;text-align:left;height:40px;}/*!sc*/ .jdgHnn th{padding-left:16px;background-color:var(--bgColor-muted,var(--color-canvas-subtle,#f6f8fa));}/*!sc*/ .bQivRW{width:100%;border-top-left-radius:6px;}/*!sc*/ @media screen and (min-width:544px){.bQivRW{display:none;}}/*!sc*/ .ldkMIO{width:40%;border-top-left-radius:6px;}/*!sc*/ @media screen and (max-width:543px){.ldkMIO{display:none;}}/*!sc*/ .jMbWeI{text-align:right;padding-right:16px;width:136px;border-top-right-radius:6px;}/*!sc*/ .gpqjiB{color:var(--fgColor-muted,var(--color-fg-muted,#656d76));font-size:12px;height:40px;}/*!sc*/ .dzCJzi{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:row;-ms-flex-direction:row;flex-direction:row;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;gap:8px;min-width:273px;padding:8px;}/*!sc*/ @media screen and (min-width:544px){.dzCJzi{-webkit-flex-wrap:nowrap;-ms-flex-wrap:nowrap;flex-wrap:nowrap;}}/*!sc*/ .eNCcrz{text-align:center;vertical-align:center;height:40px;border-top:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));}/*!sc*/ .bHTcCe{border-top:1px solid var(--borderColor-default,var(--color-border-default));cursor:pointer;}/*!sc*/ .csrIcr{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;gap:16px;}/*!sc*/ .bUQNHB{border:1px solid;border-color:var(--borderColor-default,var(--color-border-default,#d0d7de));border-radius:6px;display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;}/*!sc*/ @media screen and (max-width:543px){.bUQNHB{margin-left:-16px;margin-right:-16px;max-width:calc(100% + 32px);}}/*!sc*/ @media screen and (min-width:544px){.bUQNHB{max-width:100%;}}/*!sc*/ .jPdcfu{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;border-bottom:1px solid;border-bottom-color:var(--borderColor-default,var(--color-border-default,#d0d7de));-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;padding-right:8px;position:-webkit-sticky;position:sticky;top:0;background-color:var(--bgColor-default,var(--color-canvas-default,#ffffff));z-index:1;border-top-left-radius:6px;border-top-right-radius:6px;}/*!sc*/ .iphEWz{-webkit-box-flex:1;-webkit-flex-grow:1;-ms-flex-positive:1;flex-grow:1;border-bottom:none;max-width:100%;padding-left:8px;padding-right:8px;}/*!sc*/ .hUCRAk{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-direction:column;-ms-flex-direction:column;flex-direction:column;-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;}/*!sc*/ .cwoBXV[data-size="medium"]{color:var(--fgColor-muted,var(--color-fg-subtle,#6e7781));padding-left:8px;padding-right:8px;}/*!sc*/ .QkQOb{padding:32px;overflow:auto;}/*!sc*/ data-styled.g1[id="Box-sc-g0xbh4-0"]{content:"iVEunk,jzuOtQ,bGojzy,iNSVHo,bVgnfw,CEgMp,gMOVLe,gUkoLg,bZBlpz,lhTYNA,ffLUq,bmcJak,fLXEGX,lmSMZJ,dqfxud,fGwBZA,jxTzTd,gqqBXN,dzXgxt,iWFGlI,vcvyP,YUPas,izFOf,vIPPs,fdROMU,jGKpsv,jdgHnn,bQivRW,ldkMIO,jMbWeI,gpqjiB,dzCJzi,eNCcrz,bHTcCe,csrIcr,bUQNHB,jPdcfu,iphEWz,hUCRAk,cwoBXV,QkQOb,"}/*!sc*/ .brGdpi{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;-webkit-clip:rect(0,0,0,0);clip:rect(0,0,0,0);white-space:nowrap;border-width:0;}/*!sc*/ data-styled.g6[id="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0"]{content:"brGdpi,"}/*!sc*/ .hWlpPn{position:relative;display:inline-block;}/*!sc*/ .hWlpPn::after{position:absolute;z-index:1000000;display:none;padding:0.5em 0.75em;font:normal normal 11px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI","Noto Sans",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji";-webkit-font-smoothing:subpixel-antialiased;color:var(--tooltip-fgColor,var(--fgColor-onEmphasis,var(--color-fg-on-emphasis,#ffffff)));text-align:center;-webkit-text-decoration:none;text-decoration:none;text-shadow:none;text-transform:none;-webkit-letter-spacing:normal;-moz-letter-spacing:normal;-ms-letter-spacing:normal;letter-spacing:normal;word-wrap:break-word;white-space:pre;pointer-events:none;content:attr(aria-label);background:var(--tooltip-bgColor,var(--bgColor-emphasis,var(--color-neutral-emphasis-plus,#24292f)));border-radius:6px;opacity:0;}/*!sc*/ @-webkit-keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ @keyframes tooltip-appear{from{opacity:0;}to{opacity:1;}}/*!sc*/ .hWlpPn:hover::after,.hWlpPn:active::after,.hWlpPn:focus::after,.hWlpPn:focus-within::after{display:inline-block;-webkit-text-decoration:none;text-decoration:none;-webkit-animation-name:tooltip-appear;animation-name:tooltip-appear;-webkit-animation-duration:0.1s;animation-duration:0.1s;-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards;-webkit-animation-timing-function:ease-in;animation-timing-function:ease-in;-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .hWlpPn.tooltipped-no-delay:hover::after,.hWlpPn.tooltipped-no-delay:active::after,.hWlpPn.tooltipped-no-delay:focus::after,.hWlpPn.tooltipped-no-delay:focus-within::after{-webkit-animation-delay:0s;animation-delay:0s;}/*!sc*/ .hWlpPn.tooltipped-multiline:hover::after,.hWlpPn.tooltipped-multiline:active::after,.hWlpPn.tooltipped-multiline:focus::after,.hWlpPn.tooltipped-multiline:focus-within::after{display:table-cell;}/*!sc*/ .hWlpPn.tooltipped-s::after,.hWlpPn.tooltipped-se::after,.hWlpPn.tooltipped-sw::after{top:100%;right:50%;margin-top:6px;}/*!sc*/ .hWlpPn.tooltipped-se::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .hWlpPn.tooltipped-sw::after{margin-right:-16px;}/*!sc*/ .hWlpPn.tooltipped-n::after,.hWlpPn.tooltipped-ne::after,.hWlpPn.tooltipped-nw::after{right:50%;bottom:100%;margin-bottom:6px;}/*!sc*/ .hWlpPn.tooltipped-ne::after{right:auto;left:50%;margin-left:-16px;}/*!sc*/ .hWlpPn.tooltipped-nw::after{margin-right:-16px;}/*!sc*/ .hWlpPn.tooltipped-s::after,.hWlpPn.tooltipped-n::after{-webkit-transform:translateX(50%);-ms-transform:translateX(50%);transform:translateX(50%);}/*!sc*/ .hWlpPn.tooltipped-w::after{right:100%;bottom:50%;margin-right:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .hWlpPn.tooltipped-e::after{bottom:50%;left:100%;margin-left:6px;-webkit-transform:translateY(50%);-ms-transform:translateY(50%);transform:translateY(50%);}/*!sc*/ .hWlpPn.tooltipped-multiline::after{width:-webkit-max-content;width:-moz-max-content;width:max-content;max-width:250px;word-wrap:break-word;white-space:pre-line;border-collapse:separate;}/*!sc*/ .hWlpPn.tooltipped-multiline.tooltipped-s::after,.hWlpPn.tooltipped-multiline.tooltipped-n::after{right:auto;left:50%;-webkit-transform:translateX(-50%);-ms-transform:translateX(-50%);transform:translateX(-50%);}/*!sc*/ .hWlpPn.tooltipped-multiline.tooltipped-w::after,.hWlpPn.tooltipped-multiline.tooltipped-e::after{right:100%;}/*!sc*/ .hWlpPn.tooltipped-align-right-2::after{right:0;margin-right:0;}/*!sc*/ .hWlpPn.tooltipped-align-left-2::after{left:0;margin-left:0;}/*!sc*/ data-styled.g17[id="Tooltip__TooltipBase-sc-17tf59c-0"]{content:"hWlpPn,"}/*!sc*/ .liVpTx{display:inline-block;overflow:hidden;text-overflow:ellipsis;vertical-align:top;white-space:nowrap;max-width:125px;}/*!sc*/ data-styled.g19[id="Truncate__StyledTruncate-sc-23o1d2-0"]{content:"liVpTx,"}/*!sc*/ </style> <!-- --> <!-- --> <div class="Box-sc-g0xbh4-0 iVEunk"><div class="Box-sc-g0xbh4-0 jzuOtQ"><div class="Box-sc-g0xbh4-0 bGojzy"></div></div><div class="Box-sc-g0xbh4-0 iNSVHo"><div class="Box-sc-g0xbh4-0 bVgnfw"><div class="Box-sc-g0xbh4-0 CEgMp"><button type="button" aria-haspopup="true" aria-expanded="false" tabindex="0" aria-label="master branch" data-testid="anchor-button" class="Box-sc-g0xbh4-0 gMOVLe prc-Button-ButtonBase-c50BI overview-ref-selector width-full" data-loading="false" data-size="medium" data-variant="default" aria-describedby="branch-picker-repos-header-ref-selector-loading-announcement" id="branch-picker-repos-header-ref-selector"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="text" class="prc-Button-Label-pTQ3x"><div class="Box-sc-g0xbh4-0 bZBlpz"><div class="Box-sc-g0xbh4-0 lhTYNA"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></div><div class="Box-sc-g0xbh4-0 ffLUq ref-selector-button-text-container"><span class="Box-sc-g0xbh4-0 bmcJak prc-Text-Text-0ima0"> <!-- -->master</span></div></div></span><span data-component="trailingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-triangle-down" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m4.427 7.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.396 7H4.604a.25.25 0 0 0-.177.427Z"></path></svg></span></span></button><button hidden="" data-hotkey-scope="read-only-cursor-text-area"></button></div><div class="Box-sc-g0xbh4-0 fLXEGX"><a style="--button-color:fg.muted" type="button" href="/JasonKessler/scattertext/branches" class="Box-sc-g0xbh4-0 lmSMZJ prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rclab:-loading-announcement"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Branches</span></span></a><a style="--button-color:fg.muted" type="button" href="/JasonKessler/scattertext/tags" class="Box-sc-g0xbh4-0 lmSMZJ prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rklab:-loading-announcement"><span data-component="buttonContent" class="Box-sc-g0xbh4-0 gUkoLg prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-tag" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Tags</span></span></a></div><div class="Box-sc-g0xbh4-0 dqfxud"><a style="--button-color:fg.muted" type="button" aria-label="Go to Branches page" href="/JasonKessler/scattertext/branches" class="Box-sc-g0xbh4-0 fGwBZA prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="invisible" aria-describedby=":Relab:-loading-announcement"><svg aria-hidden="true" focusable="false" class="octicon octicon-git-branch" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M9.5 3.25a2.25 2.25 0 1 1 3 2.122V6A2.5 2.5 0 0 1 10 8.5H6a1 1 0 0 0-1 1v1.128a2.251 2.251 0 1 1-1.5 0V5.372a2.25 2.25 0 1 1 1.5 0v1.836A2.493 2.493 0 0 1 6 7h4a1 1 0 0 0 1-1v-.628A2.25 2.25 0 0 1 9.5 3.25Zm-6 0a.75.75 0 1 0 1.5 0 .75.75 0 0 0-1.5 0Zm8.25-.75a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5ZM4.25 12a.75.75 0 1 0 0 1.5.75.75 0 0 0 0-1.5Z"></path></svg></a><a style="--button-color:fg.muted" type="button" aria-label="Go to Tags page" href="/JasonKessler/scattertext/tags" class="Box-sc-g0xbh4-0 fGwBZA prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="invisible" aria-describedby=":Rmlab:-loading-announcement"><svg aria-hidden="true" focusable="false" class="octicon octicon-tag" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path></svg></a></div></div><div class="Box-sc-g0xbh4-0 jxTzTd"><div class="Box-sc-g0xbh4-0 gqqBXN"><div class="Box-sc-g0xbh4-0 dzXgxt"><!--$--><div class="Box-sc-g0xbh4-0 iWFGlI"><span class="Box-sc-g0xbh4-0 vcvyP TextInput-wrapper prc-components-TextInputWrapper-i1ofR prc-components-TextInputBaseWrapper-ueK9q" data-leading-visual="true" data-trailing-visual="true" aria-busy="false"><span class="TextInput-icon" id=":R2j5ab:" aria-hidden="true"><svg aria-hidden="true" focusable="false" class="octicon octicon-search" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M10.68 11.74a6 6 0 0 1-7.922-8.982 6 6 0 0 1 8.982 7.922l3.04 3.04a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215ZM11.5 7a4.499 4.499 0 1 0-8.997 0A4.499 4.499 0 0 0 11.5 7Z"></path></svg></span><input type="text" aria-label="Go to file" role="combobox" aria-controls="file-results-list" aria-expanded="false" aria-haspopup="dialog" autoCorrect="off" spellcheck="false" placeholder="Go to file" aria-describedby=":R2j5ab: :R2j5abH1:" data-component="input" class="prc-components-Input-Ic-y8" value=""/><span class="TextInput-icon" id=":R2j5abH1:" aria-hidden="true"></span></span></div><!--/$--></div><div class="Box-sc-g0xbh4-0 YUPas"><button type="button" class="prc-Button-ButtonBase-c50BI" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="default" aria-describedby=":Rr5ab:-loading-announcement"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="text" class="prc-Button-Label-pTQ3x">Go to file</span></span></button></div><div class="react-directory-add-file-icon"></div><div class="react-directory-remove-file-icon"></div></div><button type="button" aria-haspopup="true" aria-expanded="false" tabindex="0" class="prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="primary" aria-describedby=":R55ab:-loading-announcement" id=":R55ab:"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-code hide-sm" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m11.28 3.22 4.25 4.25a.75.75 0 0 1 0 1.06l-4.25 4.25a.749.749 0 0 1-1.275-.326.749.749 0 0 1 .215-.734L13.94 8l-3.72-3.72a.749.749 0 0 1 .326-1.275.749.749 0 0 1 .734.215Zm-6.56 0a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042L2.06 8l3.72 3.72a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L.47 8.53a.75.75 0 0 1 0-1.06Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x">Code</span><span data-component="trailingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-triangle-down" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m4.427 7.427 3.396 3.396a.25.25 0 0 0 .354 0l3.396-3.396A.25.25 0 0 0 11.396 7H4.604a.25.25 0 0 0-.177.427Z"></path></svg></span></span></button><div class="Box-sc-g0xbh4-0 izFOf"><button data-component="IconButton" type="button" aria-label="Open more actions menu" aria-haspopup="true" aria-expanded="false" tabindex="0" class="prc-Button-ButtonBase-c50BI prc-Button-IconButton-szpyj" data-loading="false" data-no-visuals="true" data-size="medium" data-variant="default" aria-describedby=":R75ab:-loading-announcement" id=":R75ab:"><svg aria-hidden="true" focusable="false" class="octicon octicon-kebab-horizontal" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M8 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3ZM1.5 9a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Zm13 0a1.5 1.5 0 1 0 0-3 1.5 1.5 0 0 0 0 3Z"></path></svg></button></div></div></div><div class="Box-sc-g0xbh4-0 vIPPs"><div data-hpc="true"><button hidden="" data-testid="focus-next-element-button" data-hotkey="j"></button><button hidden="" data-testid="focus-previous-element-button" data-hotkey="k"></button><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading" id="folders-and-files">Folders and files</h2><table aria-labelledby="folders-and-files" class="Box-sc-g0xbh4-0 fdROMU"><thead class="Box-sc-g0xbh4-0 jGKpsv"><tr class="Box-sc-g0xbh4-0 jdgHnn"><th colSpan="2" class="Box-sc-g0xbh4-0 bQivRW"><span class="text-bold">Name</span></th><th colSpan="1" class="Box-sc-g0xbh4-0 ldkMIO"><span class="text-bold">Name</span></th><th class="hide-sm"><div title="Last commit message" class="Truncate__StyledTruncate-sc-23o1d2-0 liVpTx width-fit"><span class="text-bold">Last commit message</span></div></th><th colSpan="1" class="Box-sc-g0xbh4-0 jMbWeI"><div title="Last commit date" class="Truncate__StyledTruncate-sc-23o1d2-0 liVpTx width-fit"><span class="text-bold">Last commit date</span></div></th></tr></thead><tbody><tr class="Box-sc-g0xbh4-0 gpqjiB"><td colSpan="3" class="bgColor-muted p-1 rounded-top-2"><div class="Box-sc-g0xbh4-0 dzCJzi"><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading">Latest commit</h2><div style="width:120px" class="Skeleton Skeleton--text" data-testid="loading"> </div><div class="d-flex flex-shrink-0 gap-2"><div data-testid="latest-commit-details" class="d-none d-sm-flex flex-items-center"></div><div class="d-flex gap-2"><h2 class="sr-only ScreenReaderHeading-module__userSelectNone--vW4Cq prc-Heading-Heading-6CmGO" data-testid="screen-reader-heading">History</h2><a href="/JasonKessler/scattertext/commits/master/" class="prc-Button-ButtonBase-c50BI d-none d-lg-flex LinkButton-module__code-view-link-button--xvCGA flex-items-center fgColor-default" data-loading="false" data-size="small" data-variant="invisible" aria-describedby=":Raqj8pab:-loading-announcement"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-history" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path></svg></span><span data-component="text" class="prc-Button-Label-pTQ3x"><span class="fgColor-default">389 Commits</span></span></span></a><div class="d-sm-none"></div><div class="d-flex d-lg-none"><span role="tooltip" aria-label="389 Commits" id="history-icon-button-tooltip" class="Tooltip__TooltipBase-sc-17tf59c-0 hWlpPn tooltipped-n"><a href="/JasonKessler/scattertext/commits/master/" class="prc-Button-ButtonBase-c50BI LinkButton-module__code-view-link-button--xvCGA flex-items-center fgColor-default" data-loading="false" data-size="small" data-variant="invisible" aria-describedby=":R1iqj8pab:-loading-announcement history-icon-button-tooltip"><span data-component="buttonContent" data-align="center" class="prc-Button-ButtonContent-HKbr-"><span data-component="leadingVisual" class="prc-Button-Visual-2epfX prc-Button-VisualWrap-Db-eB"><svg aria-hidden="true" focusable="false" class="octicon octicon-history" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="m.427 1.927 1.215 1.215a8.002 8.002 0 1 1-1.6 5.685.75.75 0 1 1 1.493-.154 6.5 6.5 0 1 0 1.18-4.458l1.358 1.358A.25.25 0 0 1 3.896 6H.25A.25.25 0 0 1 0 5.75V2.104a.25.25 0 0 1 .427-.177ZM7.75 4a.75.75 0 0 1 .75.75v2.992l2.028.812a.75.75 0 0 1-.557 1.392l-2.5-1A.751.751 0 0 1 7 8.25v-3.5A.75.75 0 0 1 7.75 4Z"></path></svg></span></span></a></span></div></div></div></div></td></tr><tr class="react-directory-row undefined" id="folder-row-0"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="scattertext" aria-label="scattertext, (Directory)" class="Link--primary" href="/JasonKessler/scattertext/tree/master/scattertext">scattertext</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file-directory-fill icon-directory" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M1.75 1A1.75 1.75 0 0 0 0 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0 0 16 13.25v-8.5A1.75 1.75 0 0 0 14.25 3H7.5a.25.25 0 0 1-.2-.1l-.9-1.2C6.07 1.26 5.55 1 5 1H1.75Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="scattertext" aria-label="scattertext, (Directory)" class="Link--primary" href="/JasonKessler/scattertext/tree/master/scattertext">scattertext</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-1"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitattributes" aria-label=".gitattributes, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/.gitattributes">.gitattributes</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".gitattributes" aria-label=".gitattributes, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/.gitattributes">.gitattributes</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-2"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".travis.yml" aria-label=".travis.yml, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/.travis.yml">.travis.yml</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title=".travis.yml" aria-label=".travis.yml, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/.travis.yml">.travis.yml</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-3"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="ISSUE_TEMPLATE" aria-label="ISSUE_TEMPLATE, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/ISSUE_TEMPLATE">ISSUE_TEMPLATE</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="ISSUE_TEMPLATE" aria-label="ISSUE_TEMPLATE, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/ISSUE_TEMPLATE">ISSUE_TEMPLATE</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-4"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="LICENSE" aria-label="LICENSE, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/LICENSE">LICENSE</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="LICENSE" aria-label="LICENSE, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/LICENSE">LICENSE</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-5"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="MANIFEST.in" aria-label="MANIFEST.in, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/MANIFEST.in">MANIFEST.in</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="MANIFEST.in" aria-label="MANIFEST.in, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/MANIFEST.in">MANIFEST.in</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-6"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="PhraseMachineLicense.txt" aria-label="PhraseMachineLicense.txt, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/PhraseMachineLicense.txt">PhraseMachineLicense.txt</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="PhraseMachineLicense.txt" aria-label="PhraseMachineLicense.txt, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/PhraseMachineLicense.txt">PhraseMachineLicense.txt</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-7"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README.md" aria-label="README.md, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/README.md">README.md</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="README.md" aria-label="README.md, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/README.md">README.md</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-8"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo.py" aria-label="demo.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo.py">demo.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo.py" aria-label="demo.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo.py">demo.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row undefined" id="folder-row-9"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_alt_tokenization.py" aria-label="demo_alt_tokenization.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_alt_tokenization.py">demo_alt_tokenization.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_alt_tokenization.py" aria-label="demo_alt_tokenization.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_alt_tokenization.py">demo_alt_tokenization.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-10"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_axis_crossbars_and_labels.py" aria-label="demo_axis_crossbars_and_labels.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_axis_crossbars_and_labels.py">demo_axis_crossbars_and_labels.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_axis_crossbars_and_labels.py" aria-label="demo_axis_crossbars_and_labels.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_axis_crossbars_and_labels.py">demo_axis_crossbars_and_labels.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-11"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_beta_posterior.py" aria-label="demo_beta_posterior.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_beta_posterior.py">demo_beta_posterior.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_beta_posterior.py" aria-label="demo_beta_posterior.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_beta_posterior.py">demo_beta_posterior.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-12"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_bi_normal_separation.py" aria-label="demo_bi_normal_separation.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_bi_normal_separation.py">demo_bi_normal_separation.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_bi_normal_separation.py" aria-label="demo_bi_normal_separation.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_bi_normal_separation.py">demo_bi_normal_separation.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-13"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_bm25.py" aria-label="demo_bm25.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_bm25.py">demo_bm25.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_bm25.py" aria-label="demo_bm25.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_bm25.py">demo_bm25.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-14"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_bow_pca.py" aria-label="demo_bow_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_bow_pca.py">demo_bow_pca.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_bow_pca.py" aria-label="demo_bow_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_bow_pca.py">demo_bow_pca.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-15"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_category_frequencies.py" aria-label="demo_category_frequencies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_category_frequencies.py">demo_category_frequencies.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_category_frequencies.py" aria-label="demo_category_frequencies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_category_frequencies.py">demo_category_frequencies.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-16"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_characteristic_chart.py" aria-label="demo_characteristic_chart.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_characteristic_chart.py">demo_characteristic_chart.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_characteristic_chart.py" aria-label="demo_characteristic_chart.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_characteristic_chart.py">demo_characteristic_chart.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-17"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_chinese.py" aria-label="demo_chinese.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_chinese.py">demo_chinese.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_chinese.py" aria-label="demo_chinese.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_chinese.py">demo_chinese.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-18"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cliffs_delta.py" aria-label="demo_cliffs_delta.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cliffs_delta.py">demo_cliffs_delta.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cliffs_delta.py" aria-label="demo_cliffs_delta.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cliffs_delta.py">demo_cliffs_delta.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-19"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cognitive_distortions.py" aria-label="demo_cognitive_distortions.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cognitive_distortions.py">demo_cognitive_distortions.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cognitive_distortions.py" aria-label="demo_cognitive_distortions.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cognitive_distortions.py">demo_cognitive_distortions.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-20"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cohens_d.py" aria-label="demo_cohens_d.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cohens_d.py">demo_cohens_d.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cohens_d.py" aria-label="demo_cohens_d.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cohens_d.py">demo_cohens_d.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-21"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_compact.py" aria-label="demo_compact.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_compact.py">demo_compact.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_compact.py" aria-label="demo_compact.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_compact.py">demo_compact.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-22"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_compact_suppress_documents.py" aria-label="demo_compact_suppress_documents.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_compact_suppress_documents.py">demo_compact_suppress_documents.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_compact_suppress_documents.py" aria-label="demo_compact_suppress_documents.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_compact_suppress_documents.py">demo_compact_suppress_documents.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-23"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_correlation_pearsons.py" aria-label="demo_correlation_pearsons.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_correlation_pearsons.py">demo_correlation_pearsons.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_correlation_pearsons.py" aria-label="demo_correlation_pearsons.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_correlation_pearsons.py">demo_correlation_pearsons.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-24"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_craigs_zeta.py" aria-label="demo_craigs_zeta.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_craigs_zeta.py">demo_craigs_zeta.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_craigs_zeta.py" aria-label="demo_craigs_zeta.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_craigs_zeta.py">demo_craigs_zeta.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-25"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cred_tfidf.py" aria-label="demo_cred_tfidf.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cred_tfidf.py">demo_cred_tfidf.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_cred_tfidf.py" aria-label="demo_cred_tfidf.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_cred_tfidf.py">demo_cred_tfidf.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-26"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_custom_coordinates.py" aria-label="demo_custom_coordinates.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_custom_coordinates.py">demo_custom_coordinates.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_custom_coordinates.py" aria-label="demo_custom_coordinates.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_custom_coordinates.py">demo_custom_coordinates.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-27"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_custom_topic_model.py" aria-label="demo_custom_topic_model.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_custom_topic_model.py">demo_custom_topic_model.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_custom_topic_model.py" aria-label="demo_custom_topic_model.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_custom_topic_model.py">demo_custom_topic_model.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-28"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_deltajsd.py" aria-label="demo_deltajsd.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_deltajsd.py">demo_deltajsd.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_deltajsd.py" aria-label="demo_deltajsd.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_deltajsd.py">demo_deltajsd.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-29"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dense_rank.py" aria-label="demo_dense_rank.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dense_rank.py">demo_dense_rank.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dense_rank.py" aria-label="demo_dense_rank.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dense_rank.py">demo_dense_rank.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-30"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dense_rank_difference.py" aria-label="demo_dense_rank_difference.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dense_rank_difference.py">demo_dense_rank_difference.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dense_rank_difference.py" aria-label="demo_dense_rank_difference.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dense_rank_difference.py">demo_dense_rank_difference.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-31"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dispersion.py" aria-label="demo_dispersion.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dispersion.py">demo_dispersion.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dispersion.py" aria-label="demo_dispersion.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dispersion.py">demo_dispersion.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-32"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dispersion_basic.py" aria-label="demo_dispersion_basic.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dispersion_basic.py">demo_dispersion_basic.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dispersion_basic.py" aria-label="demo_dispersion_basic.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dispersion_basic.py">demo_dispersion_basic.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-33"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dissemination.py" aria-label="demo_dissemination.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dissemination.py">demo_dissemination.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_dissemination.py" aria-label="demo_dissemination.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_dissemination.py">demo_dissemination.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-34"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_embeddings_pca.py" aria-label="demo_embeddings_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_embeddings_pca.py">demo_embeddings_pca.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_embeddings_pca.py" aria-label="demo_embeddings_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_embeddings_pca.py">demo_embeddings_pca.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-35"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_emoji.py" aria-label="demo_emoji.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_emoji.py">demo_emoji.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_emoji.py" aria-label="demo_emoji.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_emoji.py">demo_emoji.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-36"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_empath.py" aria-label="demo_empath.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_empath.py">demo_empath.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_empath.py" aria-label="demo_empath.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_empath.py">demo_empath.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-37"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_eta_da.py" aria-label="demo_eta_da.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_eta_da.py">demo_eta_da.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_eta_da.py" aria-label="demo_eta_da.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_eta_da.py">demo_eta_da.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-38"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_expected_vs_actual.py" aria-label="demo_expected_vs_actual.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_expected_vs_actual.py">demo_expected_vs_actual.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_expected_vs_actual.py" aria-label="demo_expected_vs_actual.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_expected_vs_actual.py">demo_expected_vs_actual.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-39"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_feature_importance.py" aria-label="demo_feature_importance.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_feature_importance.py">demo_feature_importance.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_feature_importance.py" aria-label="demo_feature_importance.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_feature_importance.py">demo_feature_importance.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-40"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_flashtext.py" aria-label="demo_flashtext.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_flashtext.py">demo_flashtext.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_flashtext.py" aria-label="demo_flashtext.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_flashtext.py">demo_flashtext.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-41"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_focused_pair_plot_movies.py" aria-label="demo_focused_pair_plot_movies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_focused_pair_plot_movies.py">demo_focused_pair_plot_movies.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_focused_pair_plot_movies.py" aria-label="demo_focused_pair_plot_movies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_focused_pair_plot_movies.py">demo_focused_pair_plot_movies.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-42"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_foreign_characteristic_frequencies.py" aria-label="demo_foreign_characteristic_frequencies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_foreign_characteristic_frequencies.py">demo_foreign_characteristic_frequencies.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_foreign_characteristic_frequencies.py" aria-label="demo_foreign_characteristic_frequencies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_foreign_characteristic_frequencies.py">demo_foreign_characteristic_frequencies.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-43"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_four_square.py" aria-label="demo_four_square.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_four_square.py">demo_four_square.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_four_square.py" aria-label="demo_four_square.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_four_square.py">demo_four_square.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-44"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_g2.py" aria-label="demo_g2.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_g2.py">demo_g2.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_g2.py" aria-label="demo_g2.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_g2.py">demo_g2.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-45"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_general_inquirer.py" aria-label="demo_general_inquirer.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_general_inquirer.py">demo_general_inquirer.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_general_inquirer.py" aria-label="demo_general_inquirer.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_general_inquirer.py">demo_general_inquirer.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-46"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_general_inquirer_frequency_plot.py" aria-label="demo_general_inquirer_frequency_plot.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_general_inquirer_frequency_plot.py">demo_general_inquirer_frequency_plot.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_general_inquirer_frequency_plot.py" aria-label="demo_general_inquirer_frequency_plot.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_general_inquirer_frequency_plot.py">demo_general_inquirer_frequency_plot.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-47"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_gensim_similarity.py" aria-label="demo_gensim_similarity.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_gensim_similarity.py">demo_gensim_similarity.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_gensim_similarity.py" aria-label="demo_gensim_similarity.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_gensim_similarity.py">demo_gensim_similarity.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-48"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_global_scale_log.py" aria-label="demo_global_scale_log.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_global_scale_log.py">demo_global_scale_log.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_global_scale_log.py" aria-label="demo_global_scale_log.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_global_scale_log.py">demo_global_scale_log.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-49"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_gradient.py" aria-label="demo_gradient.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_gradient.py">demo_gradient.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_gradient.py" aria-label="demo_gradient.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_gradient.py">demo_gradient.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-50"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_hedges_g.py" aria-label="demo_hedges_g.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_hedges_g.py">demo_hedges_g.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_hedges_g.py" aria-label="demo_hedges_g.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_hedges_g.py">demo_hedges_g.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-51"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_ignore_categories.py" aria-label="demo_ignore_categories.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_ignore_categories.py">demo_ignore_categories.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_ignore_categories.py" aria-label="demo_ignore_categories.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_ignore_categories.py">demo_ignore_categories.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-52"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_include_all_contexts.py" aria-label="demo_include_all_contexts.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_include_all_contexts.py">demo_include_all_contexts.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_include_all_contexts.py" aria-label="demo_include_all_contexts.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_include_all_contexts.py">demo_include_all_contexts.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-53"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_insignificant_greyed_out.py" aria-label="demo_insignificant_greyed_out.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_insignificant_greyed_out.py">demo_insignificant_greyed_out.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_insignificant_greyed_out.py" aria-label="demo_insignificant_greyed_out.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_insignificant_greyed_out.py">demo_insignificant_greyed_out.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-54"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_japanese.py" aria-label="demo_japanese.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_japanese.py">demo_japanese.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_japanese.py" aria-label="demo_japanese.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_japanese.py">demo_japanese.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-55"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_label_coloring.py" aria-label="demo_label_coloring.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_label_coloring.py">demo_label_coloring.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_label_coloring.py" aria-label="demo_label_coloring.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_label_coloring.py">demo_label_coloring.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-56"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_lemmas.py" aria-label="demo_lemmas.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_lemmas.py">demo_lemmas.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_lemmas.py" aria-label="demo_lemmas.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_lemmas.py">demo_lemmas.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-57"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_log_odds_ratio_prior.py" aria-label="demo_log_odds_ratio_prior.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_log_odds_ratio_prior.py">demo_log_odds_ratio_prior.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_log_odds_ratio_prior.py" aria-label="demo_log_odds_ratio_prior.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_log_odds_ratio_prior.py">demo_log_odds_ratio_prior.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-58"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_log_relative_risk.py" aria-label="demo_log_relative_risk.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_log_relative_risk.py">demo_log_relative_risk.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_log_relative_risk.py" aria-label="demo_log_relative_risk.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_log_relative_risk.py">demo_log_relative_risk.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-59"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_log_scale.py" aria-label="demo_log_scale.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_log_scale.py">demo_log_scale.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_log_scale.py" aria-label="demo_log_scale.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_log_scale.py">demo_log_scale.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-60"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_lrc.py" aria-label="demo_lrc.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_lrc.py">demo_lrc.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_lrc.py" aria-label="demo_lrc.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_lrc.py">demo_lrc.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-61"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_lrc_movies.py" aria-label="demo_lrc_movies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_lrc_movies.py">demo_lrc_movies.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_lrc_movies.py" aria-label="demo_lrc_movies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_lrc_movies.py">demo_lrc_movies.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-62"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_mann_whitney.py" aria-label="demo_mann_whitney.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_mann_whitney.py">demo_mann_whitney.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_mann_whitney.py" aria-label="demo_mann_whitney.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_mann_whitney.py">demo_mann_whitney.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-63"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_matplotlib_export.py" aria-label="demo_matplotlib_export.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_matplotlib_export.py">demo_matplotlib_export.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_matplotlib_export.py" aria-label="demo_matplotlib_export.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_matplotlib_export.py">demo_matplotlib_export.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-64"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_moral_foundations.py" aria-label="demo_moral_foundations.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_moral_foundations.py">demo_moral_foundations.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_moral_foundations.py" aria-label="demo_moral_foundations.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_moral_foundations.py">demo_moral_foundations.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-65"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_multi_category_pca.py" aria-label="demo_multi_category_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_multi_category_pca.py">demo_multi_category_pca.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_multi_category_pca.py" aria-label="demo_multi_category_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_multi_category_pca.py">demo_multi_category_pca.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-66"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_names.py" aria-label="demo_names.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_names.py">demo_names.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_names.py" aria-label="demo_names.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_names.py">demo_names.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-67"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_nmf_topic_model.py" aria-label="demo_nmf_topic_model.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_nmf_topic_model.py">demo_nmf_topic_model.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_nmf_topic_model.py" aria-label="demo_nmf_topic_model.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_nmf_topic_model.py">demo_nmf_topic_model.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-68"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_obama.py" aria-label="demo_obama.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_obama.py">demo_obama.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_obama.py" aria-label="demo_obama.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_obama.py">demo_obama.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-69"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_output_data.py" aria-label="demo_output_data.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_output_data.py">demo_output_data.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_output_data.py" aria-label="demo_output_data.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_output_data.py">demo_output_data.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-70"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_20_newsgroups.py" aria-label="demo_pair_plot_20_newsgroups.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_20_newsgroups.py">demo_pair_plot_20_newsgroups.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_20_newsgroups.py" aria-label="demo_pair_plot_20_newsgroups.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_20_newsgroups.py">demo_pair_plot_20_newsgroups.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-71"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_category_focused.py" aria-label="demo_pair_plot_category_focused.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_category_focused.py">demo_pair_plot_category_focused.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_category_focused.py" aria-label="demo_pair_plot_category_focused.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_category_focused.py">demo_pair_plot_category_focused.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-72"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention.py" aria-label="demo_pair_plot_convention.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention.py">demo_pair_plot_convention.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention.py" aria-label="demo_pair_plot_convention.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention.py">demo_pair_plot_convention.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-73"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention_empath.py" aria-label="demo_pair_plot_convention_empath.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention_empath.py">demo_pair_plot_convention_empath.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention_empath.py" aria-label="demo_pair_plot_convention_empath.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention_empath.py">demo_pair_plot_convention_empath.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-74"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention_geninq.py" aria-label="demo_pair_plot_convention_geninq.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention_geninq.py">demo_pair_plot_convention_geninq.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention_geninq.py" aria-label="demo_pair_plot_convention_geninq.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention_geninq.py">demo_pair_plot_convention_geninq.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-75"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention_pacmap.py" aria-label="demo_pair_plot_convention_pacmap.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention_pacmap.py">demo_pair_plot_convention_pacmap.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_convention_pacmap.py" aria-label="demo_pair_plot_convention_pacmap.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_convention_pacmap.py">demo_pair_plot_convention_pacmap.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-76"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies.py" aria-label="demo_pair_plot_movies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies.py">demo_pair_plot_movies.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies.py" aria-label="demo_pair_plot_movies.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies.py">demo_pair_plot_movies.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-77"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_doc2vec.py" aria-label="demo_pair_plot_movies_doc2vec.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_doc2vec.py">demo_pair_plot_movies_doc2vec.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_doc2vec.py" aria-label="demo_pair_plot_movies_doc2vec.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_doc2vec.py">demo_pair_plot_movies_doc2vec.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-78"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_empath.py" aria-label="demo_pair_plot_movies_empath.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_empath.py">demo_pair_plot_movies_empath.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_empath.py" aria-label="demo_pair_plot_movies_empath.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_empath.py">demo_pair_plot_movies_empath.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-79"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_mirror.py" aria-label="demo_pair_plot_movies_mirror.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_mirror.py">demo_pair_plot_movies_mirror.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_mirror.py" aria-label="demo_pair_plot_movies_mirror.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_mirror.py">demo_pair_plot_movies_mirror.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-80"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_mirror_simple.py" aria-label="demo_pair_plot_movies_mirror_simple.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_mirror_simple.py">demo_pair_plot_movies_mirror_simple.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_mirror_simple.py" aria-label="demo_pair_plot_movies_mirror_simple.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_mirror_simple.py">demo_pair_plot_movies_mirror_simple.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-81"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_pacmap.py" aria-label="demo_pair_plot_movies_pacmap.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_pacmap.py">demo_pair_plot_movies_pacmap.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_pacmap.py" aria-label="demo_pair_plot_movies_pacmap.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_pacmap.py">demo_pair_plot_movies_pacmap.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-82"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_pca.py" aria-label="demo_pair_plot_movies_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_pca.py">demo_pair_plot_movies_pca.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_pca.py" aria-label="demo_pair_plot_movies_pca.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_pca.py">demo_pair_plot_movies_pca.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-83"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_phate.py" aria-label="demo_pair_plot_movies_phate.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_phate.py">demo_pair_plot_movies_phate.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_phate.py" aria-label="demo_pair_plot_movies_phate.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_phate.py">demo_pair_plot_movies_phate.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-84"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_umap.py" aria-label="demo_pair_plot_movies_umap.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_umap.py">demo_pair_plot_movies_umap.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pair_plot_movies_umap.py" aria-label="demo_pair_plot_movies_umap.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pair_plot_movies_umap.py">demo_pair_plot_movies_umap.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-85"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pca_documents.py" aria-label="demo_pca_documents.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pca_documents.py">demo_pca_documents.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pca_documents.py" aria-label="demo_pca_documents.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pca_documents.py">demo_pca_documents.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-86"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_phate_documents.py" aria-label="demo_phate_documents.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_phate_documents.py">demo_phate_documents.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_phate_documents.py" aria-label="demo_phate_documents.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_phate_documents.py">demo_phate_documents.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-87"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_phrase_machine.py" aria-label="demo_phrase_machine.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_phrase_machine.py">demo_phrase_machine.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_phrase_machine.py" aria-label="demo_phrase_machine.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_phrase_machine.py">demo_phrase_machine.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-88"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_productivity.py" aria-label="demo_productivity.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_productivity.py">demo_productivity.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_productivity.py" aria-label="demo_productivity.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_productivity.py">demo_productivity.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-89"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pytextrank.py" aria-label="demo_pytextrank.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pytextrank.py">demo_pytextrank.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_pytextrank.py" aria-label="demo_pytextrank.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_pytextrank.py">demo_pytextrank.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-90"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_relative_entropy.py" aria-label="demo_relative_entropy.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_relative_entropy.py">demo_relative_entropy.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_relative_entropy.py" aria-label="demo_relative_entropy.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_relative_entropy.py">demo_relative_entropy.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-91"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_scaled_f_score.py" aria-label="demo_scaled_f_score.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_scaled_f_score.py">demo_scaled_f_score.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_scaled_f_score.py" aria-label="demo_scaled_f_score.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_scaled_f_score.py">demo_scaled_f_score.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-92"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_semiotic.py" aria-label="demo_semiotic.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_semiotic.py">demo_semiotic.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_semiotic.py" aria-label="demo_semiotic.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_semiotic.py">demo_semiotic.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-93"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_sentence_piece.py" aria-label="demo_sentence_piece.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_sentence_piece.py">demo_sentence_piece.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_sentence_piece.py" aria-label="demo_sentence_piece.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_sentence_piece.py">demo_sentence_piece.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-94"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_similarity.py" aria-label="demo_similarity.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_similarity.py">demo_similarity.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_similarity.py" aria-label="demo_similarity.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_similarity.py">demo_similarity.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-95"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_simple_maths.py" aria-label="demo_simple_maths.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_simple_maths.py">demo_simple_maths.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_simple_maths.py" aria-label="demo_simple_maths.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_simple_maths.py">demo_simple_maths.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-96"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_sklearn.py" aria-label="demo_sklearn.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_sklearn.py">demo_sklearn.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_sklearn.py" aria-label="demo_sklearn.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_sklearn.py">demo_sklearn.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-97"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_sparse.py" aria-label="demo_sparse.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_sparse.py">demo_sparse.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_sparse.py" aria-label="demo_sparse.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_sparse.py">demo_sparse.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-98"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_stylistic_features.py" aria-label="demo_stylistic_features.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_stylistic_features.py">demo_stylistic_features.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_stylistic_features.py" aria-label="demo_stylistic_features.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_stylistic_features.py">demo_stylistic_features.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="react-directory-row truncate-for-mobile" id="folder-row-99"><td class="react-directory-row-name-cell-small-screen" colSpan="2"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_table.py" aria-label="demo_table.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_table.py">demo_table.py</a></div></div></div></div></td><td class="react-directory-row-name-cell-large-screen" colSpan="1"><div class="react-directory-filename-column"><svg aria-hidden="true" focusable="false" class="octicon octicon-file color-fg-muted" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M2 1.75C2 .784 2.784 0 3.75 0h6.586c.464 0 .909.184 1.237.513l2.914 2.914c.329.328.513.773.513 1.237v9.586A1.75 1.75 0 0 1 13.25 16h-9.5A1.75 1.75 0 0 1 2 14.25Zm1.75-.25a.25.25 0 0 0-.25.25v12.5c0 .138.112.25.25.25h9.5a.25.25 0 0 0 .25-.25V6h-2.75A1.75 1.75 0 0 1 9 4.25V1.5Zm6.75.062V4.25c0 .138.112.25.25.25h2.688l-.011-.013-2.914-2.914-.013-.011Z"></path></svg><div class="overflow-hidden"><div class="react-directory-filename-cell"><div class="react-directory-truncate"><a title="demo_table.py" aria-label="demo_table.py, (File)" class="Link--primary" href="/JasonKessler/scattertext/blob/master/demo_table.py">demo_table.py</a></div></div></div></div></td><td class="react-directory-row-commit-cell"><div class="Skeleton Skeleton--text"> </div></td><td><div class="Skeleton Skeleton--text"> </div></td></tr><tr class="Box-sc-g0xbh4-0 eNCcrz show-for-mobile" data-testid="view-all-files-row"><td colSpan="3" class="Box-sc-g0xbh4-0 bHTcCe"><div><button class="prc-Link-Link-85e08">View all files</button></div></td></tr></tbody></table></div><div class="Box-sc-g0xbh4-0 csrIcr"><div class="Box-sc-g0xbh4-0 bUQNHB"><div itemscope="" itemType="https://schema.org/abstract" class="Box-sc-g0xbh4-0 jPdcfu"><h2 class="_VisuallyHidden__VisuallyHidden-sc-11jhm7a-0 brGdpi">Repository files navigation</h2><nav class="Box-sc-g0xbh4-0 iphEWz prc-components-UnderlineWrapper-oOh5J" aria-label="Repository files"><ul class="prc-components-UnderlineItemList-b23Hf" role="list"><li class="Box-sc-g0xbh4-0 hUCRAk"><a class="prc-components-UnderlineItem-lJsg-" href="#" aria-current="page"><span data-component="icon"><svg aria-hidden="true" focusable="false" class="octicon octicon-book" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path></svg></span><span data-component="text" data-content="README">README</span></a></li><li class="Box-sc-g0xbh4-0 hUCRAk"><a class="prc-components-UnderlineItem-lJsg-" href="#"><span data-component="icon"><svg aria-hidden="true" focusable="false" class="octicon octicon-law" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path></svg></span><span data-component="text" data-content="Apache-2.0 license">Apache-2.0 license</span></a></li></ul></nav><button style="--button-color:fg.subtle" type="button" aria-label="Outline" aria-haspopup="true" aria-expanded="false" tabindex="0" class="Box-sc-g0xbh4-0 cwoBXV prc-Button-ButtonBase-c50BI" data-loading="false" data-size="medium" data-variant="invisible" aria-describedby=":Rr9ab:-loading-announcement" id=":Rr9ab:"><svg aria-hidden="true" focusable="false" class="octicon octicon-list-unordered" viewBox="0 0 16 16" width="16" height="16" fill="currentColor" display="inline-block" overflow="visible" style="vertical-align:text-bottom"><path d="M5.75 2.5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5Zm0 5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5Zm0 5h8.5a.75.75 0 0 1 0 1.5h-8.5a.75.75 0 0 1 0-1.5ZM2 14a1 1 0 1 1 0-2 1 1 0 0 1 0 2Zm1-6a1 1 0 1 1-2 0 1 1 0 0 1 2 0ZM2 4a1 1 0 1 1 0-2 1 1 0 0 1 0 2Z"></path></svg></button></div><div class="Box-sc-g0xbh4-0 QkQOb js-snippet-clipboard-copy-unpositioned undefined" data-hpc="true"><article class="markdown-body entry-content container-lg" itemprop="text"><p dir="auto"><a href="https://travis-ci.org/JasonKessler/scattertext" rel="nofollow"><img src="https://camo.githubusercontent.com/43d5dfb9a18061b821518d69fc053716441e4db83c58cd0fdb484f70b9c3cd36/68747470733a2f2f7472617669732d63692e6f72672f4a61736f6e4b6573736c65722f73636174746572746578742e7376673f6272616e63683d6d6173746572" alt="Build Status" data-canonical-src="https://travis-ci.org/JasonKessler/scattertext.svg?branch=master" style="max-width: 100%;"></a> <a href="/JasonKessler/scattertext/blob/master"><img src="https://camo.githubusercontent.com/0ed60cb8431e3b88329d69dbcc8dbc8ca9d527539aa1e9b6521536e010a83411/68747470733a2f2f696d672e736869656c64732e696f2f707970692f762f73636174746572746578742e737667" alt="PyPI" data-canonical-src="https://img.shields.io/pypi/v/scattertext.svg" style="max-width: 100%;"></a> <a href="https://gitter.im/scattertext/Lobby" rel="nofollow"><img src="https://camo.githubusercontent.com/d49b4fed41cfa2b8dd68ac724db9b926dec8e7f130d6b9fde8fb069ce5abd344/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f4749545445522d6a6f696e253230636861742d677265656e2e737667" alt="Gitter Chat" data-canonical-src="https://img.shields.io/badge/GITTER-join%20chat-green.svg" style="max-width: 100%;"></a> <a href="https://twitter.com/jasonkessler" rel="nofollow"><img src="https://camo.githubusercontent.com/9e10b788a9284c3bc5aee5831d13ba840f400b5eae820b5854e62b615e304bd4/68747470733a2f2f696d672e736869656c64732e696f2f747769747465722f666f6c6c6f772f657370616472696e652e7376673f7374796c653d736f6369616c266c6162656c3d466f6c6c6f77" alt="Twitter Follow" data-canonical-src="https://img.shields.io/twitter/follow/espadrine.svg?style=social&amp;label=Follow" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h1 tabindex="-1" class="heading-element" dir="auto">Scattertext 0.2.2</h1><a id="user-content-scattertext-022" class="anchor" aria-label="Permalink: Scattertext 0.2.2" href="#scattertext-022"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">A tool for finding distinguishing terms in corpora and displaying them in an interactive HTML scatter plot. Points corresponding to terms are selectively labeled so that they don't overlap with other labels or points.</p> <p dir="auto">Cite as: Jason S. Kessler. Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ. ACL System Demonstrations. 2017.</p> <p dir="auto">Below is an example of using Scattertext to create visualize terms used in 2012 American political conventions. The 2,000 most party-associated uni grams are displayed as points in the scatter plot. Their x- and y- axes are the dense ranks of their usage by Republican and Democratic speakers respectively.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="import scattertext as st df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ) corpus = st.CorpusFromParsedDocuments( df, category_col='party', parsed_col='parse' ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) html = st.produce_scattertext_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, metadata=corpus.get_df()['speaker'], transform=st.Scalers.dense_rank, include_gradient=True, left_gradient_term='More Republican', middle_gradient_term='Metric: Dense Rank Difference', right_gradient_term='More Democratic', ) open('./demo_compact.html', 'w').write(html)"><pre lang="pydocstring" class="notranslate"><code>import scattertext as st df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ) corpus = st.CorpusFromParsedDocuments( df, category_col='party', parsed_col='parse' ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) html = st.produce_scattertext_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, metadata=corpus.get_df()['speaker'], transform=st.Scalers.dense_rank, include_gradient=True, left_gradient_term='More Republican', middle_gradient_term='Metric: Dense Rank Difference', right_gradient_term='More Democratic', ) open('./demo_compact.html', 'w').write(html) </code></pre></div> <p dir="auto">The HTML file written would look like the image below. Click on it for the actual interactive visualization. <a href="https://jasonkessler.github.io/demo_compact.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_compact.png" alt="demo_compact.html" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Citation</h2><a id="user-content-citation" class="anchor" aria-label="Permalink: Citation" href="#citation"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Jason S. Kessler. Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ. ACL System Demonstrations. 2017. Link to paper: <a href="https://arxiv.org/abs/1703.00565" rel="nofollow">arxiv.org/abs/1703.00565</a></p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="@article{kessler2017scattertext, author = {Kessler, Jason S.}, title = {Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ}, booktitle = {Proceedings of ACL-2017 System Demonstrations}, year = {2017}, address = {Vancouver, Canada}, publisher = {Association for Computational Linguistics}, }"><pre class="notranslate"><code>@article{kessler2017scattertext, author = {Kessler, Jason S.}, title = {Scattertext: a Browser-Based Tool for Visualizing how Corpora Differ}, booktitle = {Proceedings of ACL-2017 System Demonstrations}, year = {2017}, address = {Vancouver, Canada}, publisher = {Association for Computational Linguistics}, } </code></pre></div> <p dir="auto"><strong>Table of Contents</strong></p> <ul dir="auto"> <li> <p dir="auto"><a href="#installation">Installation</a></p> </li> <li> <p dir="auto"><a href="#overview">Overview</a></p> </li> <li> <p dir="auto"><a href="#customizing-the-visualization-and-plotting-dispersion">Customizing the Visualization and Plotting Dispersion</a></p> </li> <li> <p dir="auto"><a href="#tutorial">Tutorial</a></p> <ul dir="auto"> <li><a href="#help-i-dont-know-python-but-i-still-want-to-use-scattertext">Help! I don't know Python but I still want to use Scattertext</a></li> <li><a href="#using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations">Using Scattertext as a text analysis library: finding characteristic terms and their associations</a></li> <li><a href="#visualizing-term-associations">Visualizing term associations</a></li> <li><a href="#visualizing-phrase-associations">Visualizing phrase associations</a></li> <li><a href="#adding-color-gradients-to-explain-scores">Adding color gradients to explain scores</a></li> <li><a href="#visualizing-empath-topics-and-categories">Visualizing Empath topics and categories</a></li> <li><a href="#visualizing-the-moral-foundations-2.0-dictionary">Visualizing the Moral Foundations 2.0 Dictionary</a></li> <li><a href="#ordering-terms-by-corpus-characteristicness">Ordering Terms by Corpus Characteristicness</a></li> <li><a href="#document-based-scatterplots">Document-Based Scatterplots</a></li> <li><a href="#using-cohens-d-or-hedges-g-to-visualize-effect-size">Using Cohen's d or Hedge's g to visualize effect size</a></li> <li><a href="#using-cliffs-delta-to-visualize-effect-size">Using Cliff's Delta to visualize effect size</a></li> <li><a href="#using-bi-normal-separation-bns-to-score-terms">Using Bi-Normal Separation (BNS) to score terms</a></li> <li><a href="#using-correlations-to-explain-classifiers">Using correlations to explain classifiers</a></li> <li><a href="#using-custom-background-word-frequencies">Using Custom Background Word Frequencies</a></li> <li><a href="#plotting-word-productivity">Plotting word productivity</a></li> </ul> </li> <li> <p dir="auto"><a href="#understanding-scaled-f-score">Understanding Scaled F-Score</a></p> </li> <li> <p dir="auto"><a href="#alternative-term-scoring-methods">Alternative term scoring methods</a></p> </li> <li> <p dir="auto"><a href="#the-position-select-plot-process">The position-select-plot process</a></p> </li> <li> <p dir="auto"><a href="#advanced-uses">Advanced Uses</a></p> <ul dir="auto"> <li><a href="#visualizing-differences-based-on-only-term-frequencies">Visualizing differences based on only term frequencies</a></li> <li><a href="#visualizing-query-based-categorical-differences">Visualizing query-based categorical differences</a></li> <li><a href="#visualizing-any-kind-of-term-score">Visualizing any kind of term score</a></li> <li><a href="#custom-term-positions">Custom term positions</a></li> <li><a href="#emoji-analysis">Emoji analysis</a></li> <li><a href="#visualizing-sentencepiece-tokens">Visualizing SentencePiece tokens</a></li> <li><a href="#visualizing-scikit-learn-text-classification-weights">Visualizing scikit-learn text classification weights</a></li> <li><a href="#creating-lexicalized-semiotic-squares">Creating lexicalized semiotic squares</a></li> <li><a href="#visualizing-topic-models">Visualizing topic models</a></li> <li><a href="#creating-T-SNE-style-word-embedding-projection-plots">Creating T-SNE-style word embedding projection plots</a></li> <li><a href="#using-svd-to-visualize-any-kind-of-word-embeddings">Using SVD to visualize any kind of word embeddings</a></li> <li><a href="#exporting-plot-to-matplotlib">Exporting plot to matplotlib</a></li> <li><a href="#using-the-same-scale-for-both-axes">Using the same scale for both axes</a></li> </ul> </li> <li> <p dir="auto"><a href="#examples">Examples</a></p> </li> <li> <p dir="auto"><a href="#a-note-on-chart-layout">A note on chart layout</a></p> </li> <li> <p dir="auto"><a href="#whats-new">What's new</a></p> </li> <li> <p dir="auto"><a href="#sources">Sources</a></p> </li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Installation</h2><a id="user-content-installation" class="anchor" aria-label="Permalink: Installation" href="#installation"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Install Python 3.11 or higher and run:</p> <p dir="auto"><code>$ pip install scattertext</code></p> <p dir="auto">If you cannot (or don't want to) install spaCy, substitute <code>nlp = spacy.load('en')</code> lines with <code>nlp = scattertext.WhitespaceNLP.whitespace_nlp</code>. Note, this is not compatible with <code>word_similarity_explorer</code>, and the tokenization and sentence boundary detection capabilities will be low-performance regular expressions. See <code>demo_without_spacy.py</code> for an example.</p> <p dir="auto">It is recommended you install <code>jieba</code>, <code>spacy</code>, <code>empath</code>, <code>astropy</code>, <code>flashtext</code>, <code>gensim</code> and <code>umap-learn</code> in order to take full advantage of Scattertext.</p> <p dir="auto">Scattertext should mostly work with Python 2.7, but it may not.</p> <p dir="auto">The HTML outputs look best in Chrome and Safari.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Style Guide</h2><a id="user-content-style-guide" class="anchor" aria-label="Permalink: Style Guide" href="#style-guide"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The name of this project is Scattertext. "Scattertext" is written as a single word and should be capitalized. When used in Python, the package <code>scattertext</code> should be defined to the name <code>st</code>, i.e., <code>import scattertext as st</code>.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Overview</h2><a id="user-content-overview" class="anchor" aria-label="Permalink: Overview" href="#overview"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">This is a tool that's intended for visualizing what words and phrases are more characteristic of a category than others.</p> <p dir="auto">Consider the example at the top of the page.</p> <p dir="auto">Looking at this seem overwhelming. In fact, it's a relatively simple visualization of word use during the 2012 political convention. Each dot corresponds to a word or phrase mentioned by Republicans or Democrats during their conventions. The closer a dot is to the top of the plot, the more frequently it was used by Democrats. The further right a dot, the more that word or phrase was used by Republicans. Words frequently used by both parties, like "of" and "the" and even "Mitt" tend to occur in the upper-right-hand corner. Although very low frequency words have been hidden to preserve computing resources, a word that neither party used, like "giraffe" would be in the bottom-left-hand corner.</p> <p dir="auto">The interesting things happen close to the upper-left and lower-right corners. In the upper-left corner, words like "auto" (as in auto bailout) and "millionaires" are frequently used by Democrats but infrequently or never used by Republicans. Likewise, terms frequently used by Republicans and infrequently by Democrats occupy the bottom-right corner. These include "big government" and "olympics", referring to the Salt Lake City Olympics in which Gov. Romney was involved.</p> <p dir="auto">Terms are colored by their association. Those that are more associated with Democrats are blue, and those more associated with Republicans red.</p> <p dir="auto">Terms that are most characteristic of the both sets of documents are displayed on the far-right of the visualization.</p> <p dir="auto">The inspiration for this visualization came from Dataclysm (Rudder, 2014).</p> <p dir="auto">Scattertext is designed to help you build these graphs and efficiently label points on them.</p> <p dir="auto">The documentation (including this readme) is a work in progress. Please see the tutorial below as well as the <a href="https://github.com/JasonKessler/Scattertext-PyData">PyData 2017 Tutorial</a>.</p> <p dir="auto">Poking around the code and tests should give you a good idea of how things work.</p> <p dir="auto">The library covers some novel and effective term-importance formulas, including <strong>Scaled F-Score</strong>.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Customizing the Visualization and Plotting Dispersion</h2><a id="user-content-customizing-the-visualization-and-plotting-dispersion" class="anchor" aria-label="Permalink: Customizing the Visualization and Plotting Dispersion" href="#customizing-the-visualization-and-plotting-dispersion"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">New in Scattertext 0.1.0, one can use a dataframe for term/metadata positions and other term-specific data. We can also use it to determine term-specific information which is shown after a term is clicked.</p> <p dir="auto">Note that it is possible to disable the use of document categories in Scattertext, as we shall see in this example.</p> <p dir="auto">This example covers plotting term dispersion against word frequency and identifying the terms which are most and least dispersed given their frequencies. Using the Rosengren's S dispersion measure (Gries 2021), terms tend to increase in their dispersion scores as they get more frequent. We'll see how we can both plot this effect and factor out the effect of frequency.</p> <p dir="auto">This, along with a number of other dispersion metrics presented in Gries (2021), are available and documented in the <code>Dispersion</code> class, which we'll use later in the section.</p> <p dir="auto">Let's start by creating a Convention corpus, but we'll use the <code>CorpusWithoutCategoriesFromParsedDocuments</code> factory to ensure that no categories are included in the corpus. If we try to find document categories, we'll see that all documents have the category '_'.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import scattertext as st df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences)) corpus = st.CorpusWithoutCategoriesFromParsedDocuments( df, parsed_col='parse' ).build().get_unigram_corpus().remove_infrequent_words(minimum_term_count=6) corpus.get_categories() # Returns ['_']"><pre><span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>().<span class="pl-c1">assign</span>( <span class="pl-s1">parse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>)) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusWithoutCategoriesFromParsedDocuments</span>( <span class="pl-s1">df</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span> ).<span class="pl-c1">build</span>().<span class="pl-c1">get_unigram_corpus</span>().<span class="pl-c1">remove_infrequent_words</span>(<span class="pl-s1">minimum_term_count</span><span class="pl-c1">=</span><span class="pl-c1">6</span>) <span class="pl-s1">corpus</span>.<span class="pl-c1">get_categories</span>() <span class="pl-c"># Returns ['_']</span></pre></div> <p dir="auto">Next, we'll create a dataframe for all terms we'll plot. We'll just start by creating a dataframe where we capture the frequency of each term and various dispersion metrics. These will be shown after a term is activated in the plot.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="dispersion = st.Dispersion(corpus) dispersion_df = dispersion.get_df() dispersion_df.head(3)"><pre><span class="pl-s1">dispersion</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">Dispersion</span>(<span class="pl-s1">corpus</span>) <span class="pl-s1">dispersion_df</span> <span class="pl-c1">=</span> <span class="pl-s1">dispersion</span>.<span class="pl-c1">get_df</span>() <span class="pl-s1">dispersion_df</span>.<span class="pl-c1">head</span>(<span class="pl-c1">3</span>)</pre></div> <p dir="auto">Which returns</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content=" Frequency Range SD VC Juilland's D Rosengren's S DP DP norm KL-divergence Dissemination thank 363 134 3.108113 1.618274 0.707416 0.694898 0.391548 0.391560 0.748808 0.972954 you 1630 177 12.383708 1.435902 0.888596 0.898805 0.233627 0.233635 0.263337 0.963905 so 549 155 3.523380 1.212967 0.774299 0.822244 0.283151 0.283160 0.411750 0.986423``` These are discussed in detail in [Gries 2021](http://www.stgries.info/research/ToApp_STG_Dispersion_PHCL.pdf). Dissementation is presented in Altmann et al. (2011). We'll use Rosengren's S to find the dispersion of each term. It's which a metric designed for corpus parts (convention speeches in our case) of varying length. Where n is the number of documents in the corpus, s_i is the percentage of tokens in the corpus found in document i, v_i is term count in document i, and f is the total number of tokens in the corpus of type term type. Rosengren's S: [![Rosengren's S](https://render.githubusercontent.com/render/math?math=\frac{\Sum_{i=1}^{n}\sqrt{s_i%20\cdot%20\v_i})^2}{f})](https://render.githubusercontent.com/render/math?math=\frac{\Sum_{i=1}^{n}\sqrt{s_i%20\cdot%20\v_i}) ^2}{f}) In order to start plotting, we'll need to add coordinates for each term to the data frame. To use the `dataframe_scattertext` function, you need, at a minimum a dataframe with 'X' and 'Y' columns. The `Xpos` and `Ypos` columns indicate the positions of the original `X` and `Y` values on the scatterplot, and need to be between 0 and 1. Functions in `st.Scalers` perform this scaling. Absent `Xpos` or `Ypos`, `st.Scalers.scale` would be used. Here is a sample of values: * `st.Scalers.scale(vec)` Rescales the vector to where the minimum value is 0 and the maximum is 1. * `st.Scalers.log_scale(vec)` Rescales the lgo of the vector * `st.Scalers.dense_ranke(vec)` Rescales the dense rank of the vector * `st.Scalers.scale_center_zero_abs(vec)` Rescales a vector with both positive and negative values such that the 0 value in the original vector is plotted at 0.5, negative values are projected from [-argmax(abs(vec)), 0] to [0, 0.5] and positive values projected from [0, argmax(abs(vec))] to [0.5, 1]. ```python dispersion_df = dispersion_df.assign( X=lambda df: df.Frequency, Xpos=lambda df: st.Scalers.log_scale(df.X), Y=lambda df: df[&quot;Rosengren's S&quot;], Ypos=lambda df: st.Scalers.scale(df.Y), )"><pre class="notranslate"><code> Frequency Range SD VC Juilland's D Rosengren's S DP DP norm KL-divergence Dissemination thank 363 134 3.108113 1.618274 0.707416 0.694898 0.391548 0.391560 0.748808 0.972954 you 1630 177 12.383708 1.435902 0.888596 0.898805 0.233627 0.233635 0.263337 0.963905 so 549 155 3.523380 1.212967 0.774299 0.822244 0.283151 0.283160 0.411750 0.986423``` These are discussed in detail in [Gries 2021](http://www.stgries.info/research/ToApp_STG_Dispersion_PHCL.pdf). Dissementation is presented in Altmann et al. (2011). We'll use Rosengren's S to find the dispersion of each term. It's which a metric designed for corpus parts (convention speeches in our case) of varying length. Where n is the number of documents in the corpus, s_i is the percentage of tokens in the corpus found in document i, v_i is term count in document i, and f is the total number of tokens in the corpus of type term type. Rosengren's S: [![Rosengren's S](https://render.githubusercontent.com/render/math?math=\frac{\Sum_{i=1}^{n}\sqrt{s_i%20\cdot%20\v_i})^2}{f})](https://render.githubusercontent.com/render/math?math=\frac{\Sum_{i=1}^{n}\sqrt{s_i%20\cdot%20\v_i}) ^2}{f}) In order to start plotting, we'll need to add coordinates for each term to the data frame. To use the `dataframe_scattertext` function, you need, at a minimum a dataframe with 'X' and 'Y' columns. The `Xpos` and `Ypos` columns indicate the positions of the original `X` and `Y` values on the scatterplot, and need to be between 0 and 1. Functions in `st.Scalers` perform this scaling. Absent `Xpos` or `Ypos`, `st.Scalers.scale` would be used. Here is a sample of values: * `st.Scalers.scale(vec)` Rescales the vector to where the minimum value is 0 and the maximum is 1. * `st.Scalers.log_scale(vec)` Rescales the lgo of the vector * `st.Scalers.dense_ranke(vec)` Rescales the dense rank of the vector * `st.Scalers.scale_center_zero_abs(vec)` Rescales a vector with both positive and negative values such that the 0 value in the original vector is plotted at 0.5, negative values are projected from [-argmax(abs(vec)), 0] to [0, 0.5] and positive values projected from [0, argmax(abs(vec))] to [0.5, 1]. ```python dispersion_df = dispersion_df.assign( X=lambda df: df.Frequency, Xpos=lambda df: st.Scalers.log_scale(df.X), Y=lambda df: df["Rosengren's S"], Ypos=lambda df: st.Scalers.scale(df.Y), ) </code></pre></div> <p dir="auto">Note that the <code>Ypos</code> column here is not necessary since <code>Y</code> would automatically be scaled.</p> <p dir="auto">Finally, since we are not distinguishing between categories, we can set <code>ignore_categories=True</code>.</p> <p dir="auto">We can now plot this graph using the <code>dataframe_scattertext</code> function:</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.dataframe_scattertext( corpus, plot_df=dispersion_df, metadata=corpus.get_df()['speaker'] + ' (' + corpus.get_df()['party'].str.upper() + ')', ignore_categories=True, x_label='Log Frequency', y_label=&quot;Rosengren's S&quot;, y_axis_labels=['Less Dispersion', 'Medium', 'More Dispersion'], )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">dataframe_scattertext</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">plot_df</span><span class="pl-c1">=</span><span class="pl-s1">dispersion_df</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>] <span class="pl-c1">+</span> <span class="pl-s">' ('</span> <span class="pl-c1">+</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'party'</span>].<span class="pl-c1">str</span>.<span class="pl-c1">upper</span>() <span class="pl-c1">+</span> <span class="pl-s">')'</span>, <span class="pl-s1">ignore_categories</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Log Frequency'</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">"Rosengren's S"</span>, <span class="pl-s1">y_axis_labels</span><span class="pl-c1">=</span>[<span class="pl-s">'Less Dispersion'</span>, <span class="pl-s">'Medium'</span>, <span class="pl-s">'More Dispersion'</span>], )</pre></div> <p dir="auto">Which yields (click for an interactive version): <a href="https://jasonkessler.github.io/dispersion-basic.html" rel="nofollow"><img src="https://camo.githubusercontent.com/e2887ad2d16e5d3719ab4e6f01714fd7022005564847c9a929185b7549de7b79/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64697370657273696f6e2d62617369632e706e67" alt="dispersion-basic.html" data-canonical-src="https://jasonkessler.github.io/dispersion-basic.png" style="max-width: 100%;"></a></p> <p dir="auto">Note that we can see various dispersion statistics under a term's name, in addition to the standard usage statistics. To customize the statistics which are displayed, set the <code>term_description_column=[...]</code> parameter with a list of column names to be displayed.</p> <p dir="auto">One issue in this dispersion chart, which tends to be common to dispersion metrics in general, is that dispersion and frequency tend to have a high correlation, but with a complex, non-linear curve. Depending on the metric, this correlation curve could be power, linear, sigmoidal, or typically, something else.</p> <p dir="auto">In order to factor out this correlation, we can predict the dispersion from frequency using a non-parametric regressor, and see which terms have the highest and lowest residuals with respect to their expected dispersions based on their frequencies.</p> <p dir="auto">In this case, we'll use a KNN regressor with 10 neighbors to predict Rosengren'S from term frequencies (<code>dispersion_df.X</code> and <code>.Y</code> respectively), and compute the residual.</p> <p dir="auto">We'll the residual to color points, with a neutral color for residuals around 0 and other colors for positive and negative values. We'll add a column in the data frame for point colors, and call it ColorScore. It is populated with values between 0 and 1, with 0.5 as a netural color on the <code>d3 interpolateWarm</code> color scale. We use <code>st.Scalers.scale_center_zero_abs</code>, discussed above, to make this transformation.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="from sklearn.neighbors import KNeighborsRegressor dispersion_df = dispersion_df.assign( Expected=lambda df: KNeighborsRegressor(n_neighbors=10).fit( df.X.values.reshape(-1, 1), df.Y ).predict(df.X.values.reshape(-1, 1)), Residual=lambda df: df.Y - df.Expected, ColorScore=lambda df: st.Scalers.scale_center_zero_abs(df.Residual) ) "><pre><span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">neighbors</span> <span class="pl-k">import</span> <span class="pl-v">KNeighborsRegressor</span> <span class="pl-s1">dispersion_df</span> <span class="pl-c1">=</span> <span class="pl-s1">dispersion_df</span>.<span class="pl-c1">assign</span>( <span class="pl-v">Expected</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-en">KNeighborsRegressor</span>(<span class="pl-s1">n_neighbors</span><span class="pl-c1">=</span><span class="pl-c1">10</span>).<span class="pl-c1">fit</span>( <span class="pl-s1">df</span>.<span class="pl-c1">X</span>.<span class="pl-c1">values</span>.<span class="pl-c1">reshape</span>(<span class="pl-c1">-</span><span class="pl-c1">1</span>, <span class="pl-c1">1</span>), <span class="pl-s1">df</span>.<span class="pl-c1">Y</span> ).<span class="pl-c1">predict</span>(<span class="pl-s1">df</span>.<span class="pl-c1">X</span>.<span class="pl-c1">values</span>.<span class="pl-c1">reshape</span>(<span class="pl-c1">-</span><span class="pl-c1">1</span>, <span class="pl-c1">1</span>)), <span class="pl-v">Residual</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">Y</span> <span class="pl-c1">-</span> <span class="pl-s1">df</span>.<span class="pl-c1">Expected</span>, <span class="pl-v">ColorScore</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">scale_center_zero_abs</span>(<span class="pl-s1">df</span>.<span class="pl-c1">Residual</span>) ) </pre></div> <p dir="auto">Now we are ready to plot our colored dispersion chart. We assign the ColorScore column name to the <code>color_score_column</code> parameter in <code>dataframe_scattertext</code>.</p> <p dir="auto">Additionally, We'd like to populate the two term lists on the left with terms that have high and low residual values, indicating terms which have the most dispersion relative to their frequency-expected level and the lowest. We can do this by the <code>left_list_column</code> parameter. We can specify the upper and lower term list names using the <code>header_names</code> parameter. Finally, we can spiff-up the plot by adding an appealing background color.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.dataframe_scattertext( corpus, plot_df=dispersion_df, metadata=corpus.get_df()['speaker'] + ' (' + corpus.get_df()['party'].str.upper() + ')', ignore_categories=True, x_label='Log Frequency', y_label=&quot;Rosengren's S&quot;, y_axis_labels=['Less Dispersion', 'Medium', 'More Dispersion'], color_score_column='ColorScore', header_names={'upper': 'Lower than Expected', 'lower': 'More than Expected'}, left_list_column='Residual', background_color='#e5e5e3' )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">dataframe_scattertext</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">plot_df</span><span class="pl-c1">=</span><span class="pl-s1">dispersion_df</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>] <span class="pl-c1">+</span> <span class="pl-s">' ('</span> <span class="pl-c1">+</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'party'</span>].<span class="pl-c1">str</span>.<span class="pl-c1">upper</span>() <span class="pl-c1">+</span> <span class="pl-s">')'</span>, <span class="pl-s1">ignore_categories</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Log Frequency'</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">"Rosengren's S"</span>, <span class="pl-s1">y_axis_labels</span><span class="pl-c1">=</span>[<span class="pl-s">'Less Dispersion'</span>, <span class="pl-s">'Medium'</span>, <span class="pl-s">'More Dispersion'</span>], <span class="pl-s1">color_score_column</span><span class="pl-c1">=</span><span class="pl-s">'ColorScore'</span>, <span class="pl-s1">header_names</span><span class="pl-c1">=</span>{<span class="pl-s">'upper'</span>: <span class="pl-s">'Lower than Expected'</span>, <span class="pl-s">'lower'</span>: <span class="pl-s">'More than Expected'</span>}, <span class="pl-s1">left_list_column</span><span class="pl-c1">=</span><span class="pl-s">'Residual'</span>, <span class="pl-s1">background_color</span><span class="pl-c1">=</span><span class="pl-s">'#e5e5e3'</span> )</pre></div> <p dir="auto">Which yields (click for an interactive version): <a href="https://jasonkessler.github.io/dispersion-residual.html" rel="nofollow"><img src="https://camo.githubusercontent.com/1a4dacac9740785f890c344aa530d156789cf067a4f3c3d359458e42d03e2e1a/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64697370657273696f6e2d726573696475616c2e706e67" alt="dispersion-residual.html" data-canonical-src="https://jasonkessler.github.io/dispersion-residual.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Tutorial</h2><a id="user-content-tutorial" class="anchor" aria-label="Permalink: Tutorial" href="#tutorial"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Help! I don't know Python but I still want to use Scattertext.</h3><a id="user-content-help-i-dont-know-python-but-i-still-want-to-use-scattertext" class="anchor" aria-label="Permalink: Help! I don't know Python but I still want to use Scattertext." href="#help-i-dont-know-python-but-i-still-want-to-use-scattertext"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">While you should learn Python fully use Scattertext, I've put some of the basic functionality in a commandline tool. The tool is installed when you follow the procedure laid out above.</p> <p dir="auto">Run <code>$ scattertext --help</code> from the commandline to see the full usage information. Here's a quick example of how to use vanilla Scattertext on a CSV file. The file needs to have at least two columns, one containing the text to be analyzed, and another containing the category. In the example CSV below, the columns are text and party, respectively.</p> <p dir="auto">The example below processes the CSV file, and the resulting HTML visualization into cli_demo.html.</p> <p dir="auto">Note, the parameter <code>--minimum_term_frequency=8</code> omit terms that occur less than 8 times, and <code>--regex_parser</code> indicates a simple regular expression parser should be used in place of spaCy. The flag <code>--one_use_per_doc</code> indicates that term frequency should be calculated by only counting no more than one occurrence of a term in a document.</p> <p dir="auto">If you'd like to parse non-English text, you can use the <code>--spacy_language_model</code> argument to configure which spaCy language model the tool will use. The default is 'en' and you can see the others available at <a href="https://spacy.io/docs/api/language-models" rel="nofollow">https://spacy.io/docs/api/language-models</a>.</p> <div class="highlight highlight-source-shell notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="$ curl -s https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv | head -2 party,speaker,text democrat,BARACK OBAMA,&quot;Thank you. Thank you. Thank you. Thank you so much.Thank you.Thank you so much. Thank you. Thank you very much, everybody. Thank you. $ $ scattertext --datafile=https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv \ &gt; --text_column=text --category_column=party --metadata_column=speaker --positive_category=democrat \ &gt; --category_display_name=Democratic --not_category_display_name=Republican --minimum_term_frequency=8 \ &gt; --one_use_per_doc --regex_parser --outputfile=cli_demo.html"><pre>$ curl -s https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv <span class="pl-k">|</span> head -2 party,speaker,text democrat,BARACK OBAMA,<span class="pl-s"><span class="pl-pds">"</span>Thank you. Thank you. Thank you. Thank you so much.Thank you.Thank you so much. Thank you. Thank you very much, everybody. Thank you.</span> <span class="pl-s">$</span> <span class="pl-s">$ scattertext --datafile=https://cdn.rawgit.com/JasonKessler/scattertext/master/scattertext/data/political_data.csv <span class="pl-cce">\</span></span> <span class="pl-s">&gt; --text_column=text --category_column=party --metadata_column=speaker --positive_category=democrat <span class="pl-cce">\</span></span> <span class="pl-s">&gt; --category_display_name=Democratic --not_category_display_name=Republican --minimum_term_frequency=8 <span class="pl-cce">\</span></span> <span class="pl-s">&gt; --one_use_per_doc --regex_parser --outputfile=cli_demo.html</span></pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using Scattertext as a text analysis library: finding characteristic terms and their associations</h3><a id="user-content-using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations" class="anchor" aria-label="Permalink: Using Scattertext as a text analysis library: finding characteristic terms and their associations" href="#using-scattertext-as-a-text-analysis-library-finding-characteristic-terms-and-their-associations"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The following code creates a stand-alone HTML file that analyzes words used by Democrats and Republicans in the 2012 party conventions, and outputs some notable term associations.</p> <p dir="auto">First, import Scattertext and spaCy.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; import scattertext as st &gt;&gt;&gt; import spacy &gt;&gt;&gt; from pprint import pprint"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; import scattertext as st &gt;&gt;&gt; import spacy &gt;&gt;&gt; from pprint import pprint </code></pre></div> <p dir="auto">Next, assemble the data you want to analyze into a Pandas data frame. It should have at least two columns, the text you'd like to analyze, and the category you'd like to study. Here, the <code>text</code> column contains convention speeches while the <code>party</code> column contains the party of the speaker. We'll eventually use the <code>speaker</code> column to label snippets in the visualization.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() &gt;&gt;&gt; convention_df.iloc[0] party democrat speaker BARACK OBAMA text Thank you. Thank you. Thank you. Thank you so ... Name: 0, dtype: object"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() &gt;&gt;&gt; convention_df.iloc[0] party democrat speaker BARACK OBAMA text Thank you. Thank you. Thank you. Thank you so ... Name: 0, dtype: object </code></pre></div> <p dir="auto">Turn the data frame into a Scattertext Corpus to begin analyzing it. To look for differences in parties, set the <code>category_col</code> parameter to <code>'party'</code>, and use the speeches, present in the <code>text</code> column, as the texts to analyze by setting the <code>text</code> col parameter. Finally, pass a spaCy model in to the <code>nlp</code> argument and call <code>build()</code> to construct the corpus.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="# Turn it into a Scattertext Corpus &gt;&gt;&gt; nlp = spacy.load('en') &gt;&gt;&gt; corpus = st.CorpusFromPandas(convention_df, ... category_col='party', ... text_col='text', ... nlp=nlp).build()"><pre lang="pydocstring" class="notranslate"><code># Turn it into a Scattertext Corpus &gt;&gt;&gt; nlp = spacy.load('en') &gt;&gt;&gt; corpus = st.CorpusFromPandas(convention_df, ... category_col='party', ... text_col='text', ... nlp=nlp).build() </code></pre></div> <p dir="auto">Let's see characteristic terms in the corpus, and terms that are most associated Democrats and Republicans. See slides <a href="http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/52" rel="nofollow">52</a> to <a href="http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/59" rel="nofollow">59</a> of the <a href="http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/" rel="nofollow">Turning Unstructured Content ot Kernels of Ideas</a> talk for more details on these approaches.</p> <p dir="auto">Here are the terms that differentiate the corpus from a general English corpus.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; print(list(corpus.get_scaled_f_scores_vs_background().index[:10])) ['obama', 'romney', 'barack', 'mitt', 'obamacare', 'biden', 'romneys', 'hardworking', 'bailouts', 'autoworkers']"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; print(list(corpus.get_scaled_f_scores_vs_background().index[:10])) ['obama', 'romney', 'barack', 'mitt', 'obamacare', 'biden', 'romneys', 'hardworking', 'bailouts', 'autoworkers'] </code></pre></div> <p dir="auto">Here are the terms that are most associated with Democrats:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; term_freq_df = corpus.get_term_freq_df() &gt;&gt;&gt; term_freq_df['Democratic Score'] = corpus.get_scaled_f_scores('democrat') &gt;&gt;&gt; pprint(list(term_freq_df.sort_values(by='Democratic Score', ascending=False).index[:10])) ['auto', 'america forward', 'auto industry', 'insurance companies', 'pell', 'last week', 'pell grants', &quot;women 's&quot;, 'platform', 'millionaires']"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; term_freq_df = corpus.get_term_freq_df() &gt;&gt;&gt; term_freq_df['Democratic Score'] = corpus.get_scaled_f_scores('democrat') &gt;&gt;&gt; pprint(list(term_freq_df.sort_values(by='Democratic Score', ascending=False).index[:10])) ['auto', 'america forward', 'auto industry', 'insurance companies', 'pell', 'last week', 'pell grants', "women 's", 'platform', 'millionaires'] </code></pre></div> <p dir="auto">And Republicans:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; term_freq_df['Republican Score'] = corpus.get_scaled_f_scores('republican') &gt;&gt;&gt; pprint(list(term_freq_df.sort_values(by='Republican Score', ascending=False).index[:10])) ['big government', &quot;n't build&quot;, 'mitt was', 'the constitution', 'he wanted', 'hands that', 'of mitt', '16 trillion', 'turned around', 'in florida']"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; term_freq_df['Republican Score'] = corpus.get_scaled_f_scores('republican') &gt;&gt;&gt; pprint(list(term_freq_df.sort_values(by='Republican Score', ascending=False).index[:10])) ['big government', "n't build", 'mitt was', 'the constitution', 'he wanted', 'hands that', 'of mitt', '16 trillion', 'turned around', 'in florida'] </code></pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing term associations</h3><a id="user-content-visualizing-term-associations" class="anchor" aria-label="Permalink: Visualizing term associations" href="#visualizing-term-associations"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Now, let's write the scatter plot a stand-alone HTML file. We'll make the y-axis category "democrat", and name the category "Democrat" with a capital "D" for presentation purposes. We'll name the other category "Republican" with a capital "R". All documents in the corpus without the category "democrat" will be considered Republican. We set the width of the visualization in pixels, and label each excerpt with the speaker using the <code>metadata</code> parameter. Finally, we write the visualization to an HTML file.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; html = st.produce_scattertext_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... width_in_pixels=1000, ... metadata=convention_df['speaker']) &gt;&gt;&gt; open(&quot;Convention-Visualization.html&quot;, 'wb').write(html.encode('utf-8'))"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; html = st.produce_scattertext_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... width_in_pixels=1000, ... metadata=convention_df['speaker']) &gt;&gt;&gt; open("Convention-Visualization.html", 'wb').write(html.encode('utf-8')) </code></pre></div> <p dir="auto">Below is what the webpage looks like. Click it and wait a few minutes for the interactive version. <a href="https://jasonkessler.github.io/Conventions-Visualization.html" rel="nofollow"><img src="https://camo.githubusercontent.com/7db752f8d90001ebcd8f684a89333f2651ff3c3cd891961e0afd4d9abb955fd1/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f32303132636f6e76656e74696f6e73302e302e322e322e706e67" alt="Conventions-Visualization.html" data-canonical-src="https://jasonkessler.github.io/2012conventions0.0.2.2.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing Phrase associations</h3><a id="user-content-visualizing-phrase-associations" class="anchor" aria-label="Permalink: Visualizing Phrase associations" href="#visualizing-phrase-associations"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Scattertext can also be used to visualize the category association of a variety of different phrase types. The word "phrase" denotes any single or multi-word collocation.</p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Using PyTextRank</h4><a id="user-content-using-pytextrank" class="anchor" aria-label="Permalink: Using PyTextRank" href="#using-pytextrank"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><a href="https://github.com/DerwenAI/pytextrank">PyTextRank</a>, created by Paco Nathan, is an implementation of a modified version of the TextRank algorithm (Mihalcea and Tarau 2004). It involves graph centrality algorithm to extract a scored list of the most prominent phrases in a document. Here, named entities recognized by spaCy. As of spaCy version 2.2, these are from an NER system trained on <a href="https://catalog.ldc.upenn.edu/LDC2013T19" rel="nofollow">Ontonotes 5</a>.</p> <p dir="auto">Please install pytextrank <code>$ pip3 install pytextrank</code> before continuing with this tutorial.</p> <p dir="auto">To use, build a corpus as normal, but make sure you use spaCy to parse each document as opposed a built-in <code>whitespace_nlp</code>-type tokenizer. Note that adding PyTextRank to the spaCy pipeline is not needed, as it will be run separately by the <code>PyTextRankPhrases</code> object. We'll reduce the number of phrases displayed in the chart to 2000 using the <code>AssociationCompactor</code>. The phrases generated will be treated like non-textual features since their document scores will not correspond to word counts.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="import pytextrank, spacy import scattertext as st nlp = spacy.load('en') nlp.add_pipe(&quot;textrank&quot;, last=True) convention_df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(nlp), party=lambda df: df.party.apply({'democrat': 'Democratic', 'republican': 'Republican'}.get) ) corpus = st.CorpusFromParsedDocuments( convention_df, category_col='party', parsed_col='parse', feats_from_spacy_doc=st.PyTextRankPhrases() ).build( ).compact( AssociationCompactor(2000, use_non_text_features=True) )"><pre lang="pydocstring" class="notranslate"><code>import pytextrank, spacy import scattertext as st nlp = spacy.load('en') nlp.add_pipe("textrank", last=True) convention_df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(nlp), party=lambda df: df.party.apply({'democrat': 'Democratic', 'republican': 'Republican'}.get) ) corpus = st.CorpusFromParsedDocuments( convention_df, category_col='party', parsed_col='parse', feats_from_spacy_doc=st.PyTextRankPhrases() ).build( ).compact( AssociationCompactor(2000, use_non_text_features=True) ) </code></pre></div> <p dir="auto">Note that the terms present in the corpus are named entities, and, as opposed to frequency counts, their scores are the eigencentrality scores assigned to them by the TextRank algorithm. Running <code>corpus.get_metadata_freq_df('')</code> will return, for each category, the sums of terms' TextRank scores. The dense ranks of these scores will be used to construct the scatter plot.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="term_category_scores = corpus.get_metadata_freq_df('') print(term_category_scores) ''' Democratic Republican term our future 1.113434 0.699103 your country 0.314057 0.000000 their home 0.385925 0.000000 our government 0.185483 0.462122 our workers 0.199704 0.210989 her family 0.540887 0.405552 our time 0.510930 0.410058 ... '''"><pre lang="pydocstring" class="notranslate"><code>term_category_scores = corpus.get_metadata_freq_df('') print(term_category_scores) ''' Democratic Republican term our future 1.113434 0.699103 your country 0.314057 0.000000 their home 0.385925 0.000000 our government 0.185483 0.462122 our workers 0.199704 0.210989 her family 0.540887 0.405552 our time 0.510930 0.410058 ... ''' </code></pre></div> <p dir="auto">Before we construct the plot, let's some helper variables Since the aggregate TextRank scores aren't particularly interpretable, we'll display the per-category rank of each score in the <code>metadata_description</code> field. These will be displayed after a term is clicked.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="term_ranks = pd.DataFrame( np.argsort(np.argsort(-term_category_scores, axis=0), axis=0) + 1, columns=term_category_scores.columns, index=term_category_scores.index) metadata_descriptions = { term: '&lt;br/&gt;' + '&lt;br/&gt;'.join( '&lt;b&gt;%s&lt;/b&gt; TextRank score rank: %s/%s' % (cat, term_ranks.loc[term, cat], corpus.get_num_metadata()) for cat in corpus.get_categories()) for term in corpus.get_metadata() }"><pre lang="pydocstring" class="notranslate"><code>term_ranks = pd.DataFrame( np.argsort(np.argsort(-term_category_scores, axis=0), axis=0) + 1, columns=term_category_scores.columns, index=term_category_scores.index) metadata_descriptions = { term: '&lt;br/&gt;' + '&lt;br/&gt;'.join( '&lt;b&gt;%s&lt;/b&gt; TextRank score rank: %s/%s' % (cat, term_ranks.loc[term, cat], corpus.get_num_metadata()) for cat in corpus.get_categories()) for term in corpus.get_metadata() } </code></pre></div> <p dir="auto">We can construct term scores in a couple ways. One is a standard dense-rank difference, a score which is used in most of the two-category contrastive plots here, which will give us the most category-associated phrases. Another is to use the maximum category-specific score, this will give us the most prominent phrases in each category, regardless of the prominence in the other category. We'll take both approaches in this tutorial, let's compute the second kind of score, the category-specific prominence below.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="category_specific_prominence = term_category_scores.apply( lambda r: r.Democratic if r.Democratic &gt; r.Republican else -r.Republican, axis=1 )"><pre lang="pydocstring" class="notranslate"><code>category_specific_prominence = term_category_scores.apply( lambda r: r.Democratic if r.Democratic &gt; r.Republican else -r.Republican, axis=1 ) </code></pre></div> <p dir="auto">Now we're ready output this chart. Note that we use a <code>dense_rank</code> transform, which places identically scalled phrases atop each other. We use <code>category_specific_prominence</code> as scores, and set <code>sort_by_dist</code> as <code>False</code> to ensure the phrases displayed on the right-hand side of the chart are ranked by the scores and not distance to the upper-left or lower-right corners. Since matching phrases are treated as non-text features, we encode them as single-phrase topic models and set the <code>topic_model_preview_size</code> to <code>0</code> to indicate the topic model list shouldn't be shown. Finally, we set ensure the full documents are displayed. Note the documents will be displayed in order of phrase-specific score.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="html = produce_scattertext_explorer( corpus, category='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, transform=dense_rank, metadata=corpus.get_df()['speaker'], scores=category_specific_prominence, sort_by_dist=False, use_non_text_features=True, topic_model_term_lists={term: [term] for term in corpus.get_metadata()}, topic_model_preview_size=0, metadata_descriptions=metadata_descriptions, use_full_doc=True )"><pre lang="pydocstring" class="notranslate"><code>html = produce_scattertext_explorer( corpus, category='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, transform=dense_rank, metadata=corpus.get_df()['speaker'], scores=category_specific_prominence, sort_by_dist=False, use_non_text_features=True, topic_model_term_lists={term: [term] for term in corpus.get_metadata()}, topic_model_preview_size=0, metadata_descriptions=metadata_descriptions, use_full_doc=True ) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/PyTextRankProminenceScore.html" rel="nofollow"><img src="https://camo.githubusercontent.com/cdc5b90c80123d0b082ac7dfd01ab14a888a8268dbcf18f7fe8c6b4d4e2056b4/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f50795465787452616e6b50726f6d696e656e63652e706e67" alt="PyTextRankProminenceScore.html" data-canonical-src="https://jasonkessler.github.io/PyTextRankProminence.png" style="max-width: 100%;"></a></p> <p dir="auto">The most associated terms in each category make some sense, at least on a post hoc analysis. When referring to (then) Governor Romney, Democrats used his surname "Romney" in their most central mentions of him, while Republicans used the more familiar and humanizing "Mitt". In terms of the President Obama, the phrase "Obama" didn't show up as a top term i n either, the but the first name "Barack" was one of the the most central phrases in Democratic speeches, mirroring "Mitt."</p> <p dir="auto">Alternatively, we can Dense Rank Difference in scores to color phrase-points and determine the top phrases to be displayed on the right-hand side of the chart. Instead of setting <code>scores</code> as category-specific prominence scores, we set <code>term_scorer=RankDifference()</code> to inject a way determining term scores into the scatter plot creation process.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="html = produce_scattertext_explorer( corpus, category='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, transform=dense_rank, use_non_text_features=True, metadata=corpus.get_df()['speaker'], term_scorer=RankDifference(), sort_by_dist=False, topic_model_term_lists={term: [term] for term in corpus.get_metadata()}, topic_model_preview_size=0, metadata_descriptions=metadata_descriptions, use_full_doc=True )"><pre lang="pydocstring" class="notranslate"><code>html = produce_scattertext_explorer( corpus, category='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, transform=dense_rank, use_non_text_features=True, metadata=corpus.get_df()['speaker'], term_scorer=RankDifference(), sort_by_dist=False, topic_model_term_lists={term: [term] for term in corpus.get_metadata()}, topic_model_preview_size=0, metadata_descriptions=metadata_descriptions, use_full_doc=True ) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/PyTextRankRankDiff.html" rel="nofollow"><img src="https://camo.githubusercontent.com/de3e3d784bb952e829f6193edff19c9b5cc516c5c2193ff5554745da50e01541/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f50795465787452616e6b52616e6b446966662e706e67" alt="PyTextRankRankDiff.html" data-canonical-src="https://jasonkessler.github.io/PyTextRankRankDiff.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Using Phrasemachine to find phrases.</h4><a id="user-content-using-phrasemachine-to-find-phrases" class="anchor" aria-label="Permalink: Using Phrasemachine to find phrases." href="#using-phrasemachine-to-find-phrases"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Phrasemachine from <a href="https://github.com/AbeHandler">AbeHandler</a> (Handler et al. 2016) uses regular expressions over sequences of part-of-speech tags to identify noun phrases. This has the advantage over using spaCy's NP-chunking in that it tends to isolote meaningful, large noun phases which are free of appositives.</p> <p dir="auto">A opposed to PyTextRank, we'll just use counts of these phrases, treating them like any other term.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="import spacy from scattertext import SampleCorpora, PhraseMachinePhrases, dense_rank, RankDifference, AssociationCompactor, produce_scattertext_explorer from scattertext.CorpusFromPandas import CorpusFromPandas corpus = (CorpusFromPandas(SampleCorpora.ConventionData2012.get_data(), category_col='party', text_col='text', feats_from_spacy_doc=PhraseMachinePhrases(), nlp=spacy.load('en', parser=False)) .build().compact(AssociationCompactor(4000))) html = produce_scattertext_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, transform=dense_rank, metadata=corpus.get_df()['speaker'], term_scorer=RankDifference(), width_in_pixels=1000)"><pre lang="pydocstring" class="notranslate"><code>import spacy from scattertext import SampleCorpora, PhraseMachinePhrases, dense_rank, RankDifference, AssociationCompactor, produce_scattertext_explorer from scattertext.CorpusFromPandas import CorpusFromPandas corpus = (CorpusFromPandas(SampleCorpora.ConventionData2012.get_data(), category_col='party', text_col='text', feats_from_spacy_doc=PhraseMachinePhrases(), nlp=spacy.load('en', parser=False)) .build().compact(AssociationCompactor(4000))) html = produce_scattertext_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, transform=dense_rank, metadata=corpus.get_df()['speaker'], term_scorer=RankDifference(), width_in_pixels=1000) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/Phrasemachine.html" rel="nofollow"><img src="https://camo.githubusercontent.com/c04db4cd5b227ebf5d77dd556a4b3f99cd177852a9d865e18f7fd201bf9e670c/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f5068726173654d616368696e652e706e67" alt="Phrasemachine.html" data-canonical-src="https://jasonkessler.github.io/PhraseMachine.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Adding color gradients to explain scores</h3><a id="user-content-adding-color-gradients-to-explain-scores" class="anchor" aria-label="Permalink: Adding color gradients to explain scores" href="#adding-color-gradients-to-explain-scores"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In Scattertext, various metrics, including term associations, are often shown through two ways. The first and most important, is the position in the chart. The second is the color of a point or text. In Scattertext 0.2.21, a way of visualizing the semantics of these scores is introduced: the gradient as key.</p> <p dir="auto">The gradient, by default, follows the <code>d3_color_scale</code> parameter of <code>produce_scattertext_explorer</code> which is <code>d3.interpolateRdYlBu</code> by default.</p> <p dir="auto">The following additional parameters to <code>produce_scattertext_explorer</code> (and similar functions) allow for the manipulation gradients.</p> <ul dir="auto"> <li><code>include_gradient: bool</code> (<code>False</code> by default) is a flag that triggers the appearance of a gradient.</li> <li><code>left_gradient_term: Optional[str]</code> indicates the text written on the far-left side of the gradient. It is written in <code>gradient_text_color</code> and is <code>category_name</code> by default.</li> <li><code>right_gradient_term: Optional[str]</code> indicates the text written on the far-left side of the gradient. It is written in <code>gradient_text_color</code> and is <code>not_category_name</code> by default.</li> <li><code>middle_gradient_term: Optional[str]</code> indicates the text written in the middle of the gradient. It is the opposite color of the center gradient color and is empty by default.</li> <li><code>gradient_text_color: Optional[str]</code> indicates the fixed color of the text written on the gradient. If None, it defaults to opposite color of the gradient.</li> <li><code>left_text_color: Optional[str]</code> overrides <code>gradient_text_color</code> for the left gradient term</li> <li><code>middle_text_color: Optional[str]</code> overrides <code>gradient_text_color</code> for the middle gradient term</li> <li><code>right_text_color: Optional[str]</code> overrides <code>gradient_text_color</code> for the right gradient term</li> <li><code>gradient_colors: Optional[List[str]]</code> list of hex colors, including '#', (e.g., <code>['#0000ff', '#980067', '#cc3300', '#32cd00']</code>) which describe the gradient. If given, these override <code>d3_color_scale</code>.</li> </ul> <p dir="auto">A straightforward example is as follows. Term colors are defined as a mapping between a term name and a <code>#RRGGBB</code> color as part of the <code>term_color</code> parameter, and the color gradient is defined in <code>gradient_colors</code>. THe</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content=" import matplotlib.pyplot as plt import matplotlib as mpl df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ) corpus = st.CorpusFromParsedDocuments( df, category_col='party', parsed_col='parse' ).build().get_unigram_corpus().compact(st.AssociationCompactor(2000)) html = st.produce_scattertext_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, metadata=corpus.get_df()['speaker'], transform=st.Scalers.dense_rank, include_gradient=True, left_gradient_term=&quot;More Democratic&quot;, right_gradient_term=&quot;More Republican&quot;, middle_gradient_term='Metric: Dense Rank Difference', gradient_text_color=&quot;white&quot;, term_colors=dict(zip( corpus.get_terms(), [ mpl.colors.to_hex(x) for x in plt.get_cmap('brg')( st.Scalers.scale_center_zero_abs( st.RankDifferenceScorer(corpus).set_categories('democrat').get_scores()).values ) ] )), gradient_colors=[mpl.colors.to_hex(x) for x in plt.get_cmap('brg')(np.arange(1., 0., -0.01))], )"><pre><span class="pl-k">import</span> <span class="pl-s1">matplotlib</span>.<span class="pl-s1">pyplot</span> <span class="pl-k">as</span> <span class="pl-s1">plt</span> <span class="pl-k">import</span> <span class="pl-s1">matplotlib</span> <span class="pl-k">as</span> <span class="pl-s1">mpl</span> <span class="pl-s1">df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>().<span class="pl-c1">assign</span>( <span class="pl-s1">parse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) ) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>( <span class="pl-s1">df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span> ).<span class="pl-c1">build</span>().<span class="pl-c1">get_unigram_corpus</span>().<span class="pl-c1">compact</span>(<span class="pl-s1">st</span>.<span class="pl-c1">AssociationCompactor</span>(<span class="pl-c1">2000</span>)) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">minimum_term_frequency</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">pmi_threshold_coefficient</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>], <span class="pl-s1">transform</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">dense_rank</span>, <span class="pl-s1">include_gradient</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">left_gradient_term</span><span class="pl-c1">=</span><span class="pl-s">"More Democratic"</span>, <span class="pl-s1">right_gradient_term</span><span class="pl-c1">=</span><span class="pl-s">"More Republican"</span>, <span class="pl-s1">middle_gradient_term</span><span class="pl-c1">=</span><span class="pl-s">'Metric: Dense Rank Difference'</span>, <span class="pl-s1">gradient_text_color</span><span class="pl-c1">=</span><span class="pl-s">"white"</span>, <span class="pl-s1">term_colors</span><span class="pl-c1">=</span><span class="pl-en">dict</span>(<span class="pl-en">zip</span>( <span class="pl-s1">corpus</span>.<span class="pl-c1">get_terms</span>(), [ <span class="pl-s1">mpl</span>.<span class="pl-c1">colors</span>.<span class="pl-c1">to_hex</span>(<span class="pl-s1">x</span>) <span class="pl-k">for</span> <span class="pl-s1">x</span> <span class="pl-c1">in</span> <span class="pl-s1">plt</span>.<span class="pl-c1">get_cmap</span>(<span class="pl-s">'brg'</span>)( <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">scale_center_zero_abs</span>( <span class="pl-s1">st</span>.<span class="pl-c1">RankDifferenceScorer</span>(<span class="pl-s1">corpus</span>).<span class="pl-c1">set_categories</span>(<span class="pl-s">'democrat'</span>).<span class="pl-c1">get_scores</span>()).<span class="pl-c1">values</span> ) ] )), <span class="pl-s1">gradient_colors</span><span class="pl-c1">=</span>[<span class="pl-s1">mpl</span>.<span class="pl-c1">colors</span>.<span class="pl-c1">to_hex</span>(<span class="pl-s1">x</span>) <span class="pl-k">for</span> <span class="pl-s1">x</span> <span class="pl-c1">in</span> <span class="pl-s1">plt</span>.<span class="pl-c1">get_cmap</span>(<span class="pl-s">'brg'</span>)(<span class="pl-s1">np</span>.<span class="pl-c1">arange</span>(<span class="pl-c1">1.</span>, <span class="pl-c1">0.</span>, <span class="pl-c1">-</span><span class="pl-c1">0.01</span>))], )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_gradient.html" rel="nofollow"><img src="https://camo.githubusercontent.com/cccbc78f7e640953b8630142686836e34945ea78c6b5575c4180613191989b70/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f6772616469656e742e706e67" alt="demo_gradient.html" data-canonical-src="https://jasonkessler.github.io/gradient.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing Empath topics and categories</h3><a id="user-content-visualizing-empath-topics-and-categories" class="anchor" aria-label="Permalink: Visualizing Empath topics and categories" href="#visualizing-empath-topics-and-categories"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In order to visualize Empath (Fast et al., 2016) topics and categories instead of terms, we'll need to create a <code>Corpus</code> of extracted topics and categories rather than unigrams and bigrams. To do so, use the <code>FeatsOnlyFromEmpath</code> feature extractor. See the source code for examples of how to make your own.</p> <p dir="auto">When creating the visualization, pass the <code>use_non_text_features=True</code> argument into <code>produce_scattertext_explorer</code>. This will instruct it to use the labeled Empath topics and categories instead of looking for terms. Since the documents returned when a topic or category label is clicked will be in order of the document-level category-association strength, setting <code>use_full_doc=True</code> makes sense, unless you have enormous documents. Otherwise, the first 300 characters will be shown.</p> <p dir="auto">(New in 0.0.26). Ensure you include <code>topic_model_term_lists=feat_builder.get_top_model_term_lists()</code> in <code>produce_scattertext_explorer</code> to ensure it bolds passages of snippets that match the topic model.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; feat_builder = st.FeatsFromOnlyEmpath() &gt;&gt;&gt; empath_corpus = st.CorpusFromParsedDocuments(convention_df, ... category_col='party', ... feats_from_spacy_doc=feat_builder, ... parsed_col='text').build() &gt;&gt;&gt; html = st.produce_scattertext_explorer(empath_corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... width_in_pixels=1000, ... metadata=convention_df['speaker'], ... use_non_text_features=True, ... use_full_doc=True, ... topic_model_term_lists=feat_builder.get_top_model_term_lists()) &gt;&gt;&gt; open(&quot;Convention-Visualization-Empath.html&quot;, 'wb').write(html.encode('utf-8'))"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; feat_builder = st.FeatsFromOnlyEmpath() &gt;&gt;&gt; empath_corpus = st.CorpusFromParsedDocuments(convention_df, ... category_col='party', ... feats_from_spacy_doc=feat_builder, ... parsed_col='text').build() &gt;&gt;&gt; html = st.produce_scattertext_explorer(empath_corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... width_in_pixels=1000, ... metadata=convention_df['speaker'], ... use_non_text_features=True, ... use_full_doc=True, ... topic_model_term_lists=feat_builder.get_top_model_term_lists()) &gt;&gt;&gt; open("Convention-Visualization-Empath.html", 'wb').write(html.encode('utf-8')) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/Convention-Visualization-Empath.html" rel="nofollow"><img src="https://camo.githubusercontent.com/1a7b7b0250f763a20fed2a0fda35e9c20f2b1ffc01eec2b1f228ab29058e29cd/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e2d56697375616c697a6174696f6e2d456d706174682e706e67" alt="Convention-Visualization-Empath.html" data-canonical-src="https://jasonkessler.github.io/Convention-Visualization-Empath.png" style="max-width: 100%;"></a></p> <p dir="auto">c Scattertext also includes a feature builder to explore the relationship between General Inquirer Tag Categoires and Document Categories. We'll use a slightly different approach, looking at relationship of GI Tag Categories to political parties by using the Z-Scores of the Log-Odds-Ratio with Uninformative Dirichlet Priors (Monroe 2008). We'll use the <code>produce_frequency_explorer</code> plot variation to visualize this relationship, setting the x-axis as the number of times a word in the tag category occurs, and the y-axis as the z-score.</p> <p dir="auto">For more information on the General Inquirer, please see the <a href="http://www.wjh.harvard.edu/~inquirer/" rel="nofollow">General Inquirer Home Page</a>.</p> <p dir="auto">We'll use the same data set as before, except we'll use the <code>FeatsFromGeneralInquirer</code> feature builder.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; general_inquirer_feature_builder = st.FeatsFromGeneralInquirer() &gt;&gt;&gt; corpus = st.CorpusFromPandas(convention_df, ... category_col='party', ... text_col='text', ... nlp=st.whitespace_nlp_with_sentences, ... feats_from_spacy_doc=general_inquirer_feature_builder).build()"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; general_inquirer_feature_builder = st.FeatsFromGeneralInquirer() &gt;&gt;&gt; corpus = st.CorpusFromPandas(convention_df, ... category_col='party', ... text_col='text', ... nlp=st.whitespace_nlp_with_sentences, ... feats_from_spacy_doc=general_inquirer_feature_builder).build() </code></pre></div> <p dir="auto">Next, we'll call <code>produce_frequency_explorer</code> in a similar way we called <code>produce_scattertext_explorer</code> in the previous section. There are a few differences, however. First, we specify the <code>LogOddsRatioUninformativeDirichletPrior</code> term scorer, which scores the relationships between the categories. The <code>grey_threshold</code> indicates the points scoring between [-1.96, 1.96] (i.e., p &gt; 0.05) should be colored gray. The argument <code>metadata_descriptions=general_inquirer_feature_builder.get_definitions()</code> indicates that a dictionary mapping the tag name to a string definition is passed. When a tag is clicked, the definition in the dictionary will be shown below the plot, as shown in the image following the snippet.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; html = st.produce_frequency_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... metadata=convention_df['speaker'], ... use_non_text_features=True, ... use_full_doc=True, ... term_scorer=st.LogOddsRatioUninformativeDirichletPrior(), ... grey_threshold=1.96, ... width_in_pixels=1000, ... topic_model_term_lists=general_inquirer_feature_builder.get_top_model_term_lists(), ... metadata_descriptions=general_inquirer_feature_builder.get_definitions())"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; html = st.produce_frequency_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... metadata=convention_df['speaker'], ... use_non_text_features=True, ... use_full_doc=True, ... term_scorer=st.LogOddsRatioUninformativeDirichletPrior(), ... grey_threshold=1.96, ... width_in_pixels=1000, ... topic_model_term_lists=general_inquirer_feature_builder.get_top_model_term_lists(), ... metadata_descriptions=general_inquirer_feature_builder.get_definitions()) </code></pre></div> <p dir="auto">Here's the resulting chart.<br> <a href="https://jasonkessler.github.io/demo_general_inquirer_frequency_plot.html" rel="nofollow"><img src="https://camo.githubusercontent.com/1bb4c4ae64b1b539244f3936a8960293398eb7b0a106c81dda8c5db745ec072c/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f67656e6572616c5f696e7175697265722e706e67" alt="demo_general_inquirer_frequency_plot.html" data-canonical-src="https://jasonkessler.github.io/general_inquirer.png" style="max-width: 100%;"></a></p> <p dir="auto"><a href="https://jasonkessler.github.io/demo_general_inquirer_frequency_plot.html" rel="nofollow"><img src="https://camo.githubusercontent.com/d32a10b53f6e165909ada541eb99ba108a9de67d647a5544e2ea2357dcd3479f/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f67656e6572616c5f696e717569726572322e706e67" alt="demo_general_inquirer_frequency_plot.html" data-canonical-src="https://jasonkessler.github.io/general_inquirer2.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing the Moral Foundations 2.0 Dictionary</h3><a id="user-content-visualizing-the-moral-foundations-20-dictionary" class="anchor" aria-label="Permalink: Visualizing the Moral Foundations 2.0 Dictionary" href="#visualizing-the-moral-foundations-20-dictionary"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The <a href="https://moralfoundations.org/" rel="nofollow">[Moral Foundations Theory]</a> proposes six psychological constructs as building blocks of moral thinking, as described in Graham et al. (2013). These foundations are, as described on <a href="https://moralfoundations.org/" rel="nofollow">[moralfoundations.org]</a>: care/harm, fairness/cheating, loyalty/betrayal, authority/subversion, sanctity/degradation, and liberty/oppression. Please see the site for a more in-depth discussion of these foundations.</p> <p dir="auto">Frimer et al. (2019) created the Moral Foundations Dictionary 2.0, or a lexicon of terms which invoke a moral foundation as a virtue (favorable toward the foundation) or a vice (in opposition to the foundation).</p> <p dir="auto">This dictionary can be used in the same way as the General Inquirer. In this example, we can plot the Cohen's d scores of foundation-word counts relative to the frequencies words involving those foundations were invoked.</p> <p dir="auto">We can first load the corpus as normal, and use <code>st.FeatsFromMoralFoundationsDictionary()</code> to extract features.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import scattertext as st convention_df = st.SampleCorpora.ConventionData2012.get_data() moral_foundations_feats = st.FeatsFromMoralFoundationsDictionary() corpus = st.CorpusFromPandas(convention_df, category_col='party', text_col='text', nlp=st.whitespace_nlp_with_sentences, feats_from_spacy_doc=moral_foundations_feats).build()"><pre><span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">moral_foundations_feats</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">FeatsFromMoralFoundationsDictionary</span>() <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromPandas</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">text_col</span><span class="pl-c1">=</span><span class="pl-s">'text'</span>, <span class="pl-s1">nlp</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>, <span class="pl-s1">feats_from_spacy_doc</span><span class="pl-c1">=</span><span class="pl-s1">moral_foundations_feats</span>).<span class="pl-c1">build</span>()</pre></div> <p dir="auto">Next, let's use Cohen's d term scorer to analyze the corpus, and describe a set of Cohen's d association scores.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="cohens_d_scorer = st.CohensD(corpus).use_metadata() term_scorer = cohens_d_scorer.set_categories('democrat', ['republican']).term_scorer.get_score_df()"><pre><span class="pl-s1">cohens_d_scorer</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CohensD</span>(<span class="pl-s1">corpus</span>).<span class="pl-c1">use_metadata</span>() <span class="pl-s1">term_scorer</span> <span class="pl-c1">=</span> <span class="pl-s1">cohens_d_scorer</span>.<span class="pl-c1">set_categories</span>(<span class="pl-s">'democrat'</span>, [<span class="pl-s">'republican'</span>]).<span class="pl-c1">term_scorer</span>.<span class="pl-c1">get_score_df</span>()</pre></div> <p dir="auto">Which yields the following data frame:</p> <markdown-accessiblity-table><table> <thead> <tr> <th align="left"></th> <th align="right">cohens_d</th> <th align="right">cohens_d_se</th> <th align="right">cohens_d_z</th> <th align="right">cohens_d_p</th> <th align="right">hedges_g</th> <th align="right">hedges_g_se</th> <th align="right">hedges_g_z</th> <th align="right">hedges_g_p</th> <th align="right">m1</th> <th align="right">m2</th> <th align="right">count1</th> <th align="right">count2</th> <th align="right">docs1</th> <th align="right">docs2</th> </tr> </thead> <tbody> <tr> <td align="left">care.virtue</td> <td align="right">0.662891</td> <td align="right">0.149425</td> <td align="right">4.43629</td> <td align="right">4.57621e-06</td> <td align="right">0.660257</td> <td align="right">0.159049</td> <td align="right">4.15129</td> <td align="right">1.65302e-05</td> <td align="right">0.195049</td> <td align="right">0.12164</td> <td align="right">760</td> <td align="right">379</td> <td align="right">115</td> <td align="right">54</td> </tr> <tr> <td align="left">care.vice</td> <td align="right">0.24435</td> <td align="right">0.146025</td> <td align="right">1.67335</td> <td align="right">0.0471292</td> <td align="right">0.243379</td> <td align="right">0.152654</td> <td align="right">1.59432</td> <td align="right">0.0554325</td> <td align="right">0.0580005</td> <td align="right">0.0428358</td> <td align="right">244</td> <td align="right">121</td> <td align="right">80</td> <td align="right">41</td> </tr> <tr> <td align="left">fairness.virtue</td> <td align="right">0.176794</td> <td align="right">0.145767</td> <td align="right">1.21286</td> <td align="right">0.112592</td> <td align="right">0.176092</td> <td align="right">0.152164</td> <td align="right">1.15725</td> <td align="right">0.123586</td> <td align="right">0.0502469</td> <td align="right">0.0403369</td> <td align="right">225</td> <td align="right">107</td> <td align="right">71</td> <td align="right">39</td> </tr> <tr> <td align="left">fairness.vice</td> <td align="right">0.0707162</td> <td align="right">0.145528</td> <td align="right">0.485928</td> <td align="right">0.313509</td> <td align="right">0.0704352</td> <td align="right">0.151711</td> <td align="right">0.464273</td> <td align="right">0.321226</td> <td align="right">0.00718627</td> <td align="right">0.00573227</td> <td align="right">32</td> <td align="right">14</td> <td align="right">21</td> <td align="right">10</td> </tr> <tr> <td align="left">authority.virtue</td> <td align="right">-0.0187793</td> <td align="right">0.145486</td> <td align="right">-0.12908</td> <td align="right">0.551353</td> <td align="right">-0.0187047</td> <td align="right">0.15163</td> <td align="right">-0.123357</td> <td align="right">0.549088</td> <td align="right">0.358192</td> <td align="right">0.361191</td> <td align="right">1281</td> <td align="right">788</td> <td align="right">122</td> <td align="right">66</td> </tr> <tr> <td align="left">authority.vice</td> <td align="right">-0.0354164</td> <td align="right">0.145494</td> <td align="right">-0.243422</td> <td align="right">0.596161</td> <td align="right">-0.0352757</td> <td align="right">0.151646</td> <td align="right">-0.232619</td> <td align="right">0.591971</td> <td align="right">0.00353465</td> <td align="right">0.00390602</td> <td align="right">20</td> <td align="right">14</td> <td align="right">14</td> <td align="right">10</td> </tr> <tr> <td align="left">sanctity.virtue</td> <td align="right">-0.512145</td> <td align="right">0.147848</td> <td align="right">-3.46399</td> <td align="right">0.999734</td> <td align="right">-0.51011</td> <td align="right">0.156098</td> <td align="right">-3.26788</td> <td align="right">0.999458</td> <td align="right">0.0587987</td> <td align="right">0.101677</td> <td align="right">265</td> <td align="right">309</td> <td align="right">74</td> <td align="right">48</td> </tr> <tr> <td align="left">sanctity.vice</td> <td align="right">-0.108011</td> <td align="right">0.145589</td> <td align="right">-0.74189</td> <td align="right">0.770923</td> <td align="right">-0.107582</td> <td align="right">0.151826</td> <td align="right">-0.708585</td> <td align="right">0.760709</td> <td align="right">0.00845048</td> <td align="right">0.0109339</td> <td align="right">35</td> <td align="right">28</td> <td align="right">23</td> <td align="right">20</td> </tr> <tr> <td align="left">loyalty.virtue</td> <td align="right">-0.413696</td> <td align="right">0.147031</td> <td align="right">-2.81367</td> <td align="right">0.997551</td> <td align="right">-0.412052</td> <td align="right">0.154558</td> <td align="right">-2.666</td> <td align="right">0.996162</td> <td align="right">0.259296</td> <td align="right">0.309776</td> <td align="right">1056</td> <td align="right">717</td> <td align="right">119</td> <td align="right">66</td> </tr> <tr> <td align="left">loyalty.vice</td> <td align="right">-0.0854683</td> <td align="right">0.145549</td> <td align="right">-0.587213</td> <td align="right">0.72147</td> <td align="right">-0.0851287</td> <td align="right">0.151751</td> <td align="right">-0.560978</td> <td align="right">0.712594</td> <td align="right">0.00124518</td> <td align="right">0.00197022</td> <td align="right">5</td> <td align="right">5</td> <td align="right">5</td> <td align="right">4</td> </tr> </tbody> </table></markdown-accessiblity-table> <p dir="auto">This data frame gives us Cohen's d scores (and their standard errors and z-scores), Hedge's <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$g$</math-renderer> scores (ditto), the mean document-length normalized topic usage per category (where the in-focus category is m1 [in this case Democrats] and the out-of-focus is m2), the raw number of words used in for each topic (count1 and count2), and the number of documents in each category with the topic (docs1 and docs2).</p> <p dir="auto">Note that Cohen's d is the difference of m1 and m2 divided by their pooled standard deviation.</p> <p dir="auto">Now, let's plot the d-scores of foundations vs. their frequencies.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_frequency_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', metadata=convention_df['speaker'], use_non_text_features=True, use_full_doc=True, term_scorer=st.CohensD(corpus).use_metadata(), grey_threshold=0, width_in_pixels=1000, topic_model_term_lists=moral_foundations_feats.get_top_model_term_lists(), metadata_descriptions=moral_foundations_feats.get_definitions() )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_frequency_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>[<span class="pl-s">'speaker'</span>], <span class="pl-s1">use_non_text_features</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">use_full_doc</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">term_scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">CohensD</span>(<span class="pl-s1">corpus</span>).<span class="pl-c1">use_metadata</span>(), <span class="pl-s1">grey_threshold</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">topic_model_term_lists</span><span class="pl-c1">=</span><span class="pl-s1">moral_foundations_feats</span>.<span class="pl-c1">get_top_model_term_lists</span>(), <span class="pl-s1">metadata_descriptions</span><span class="pl-c1">=</span><span class="pl-s1">moral_foundations_feats</span>.<span class="pl-c1">get_definitions</span>() )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_moral_foundations.html" rel="nofollow"><img src="https://camo.githubusercontent.com/3f55bb37c0bfc87a805dae0d5dfe633ce0a9b8031397966ec39cc341b3444452/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f6d6f72616c5f666f756e646174696f6e732e706e67" alt="demo_moral_foundations.html" data-canonical-src="https://jasonkessler.github.io/demo_moral_foundations.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Ordering Terms by Corpus Characteristicness</h3><a id="user-content-ordering-terms-by-corpus-characteristicness" class="anchor" aria-label="Permalink: Ordering Terms by Corpus Characteristicness" href="#ordering-terms-by-corpus-characteristicness"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Often the terms of most interest are ones that are characteristic to the corpus as a whole. These are terms which occur frequently in all sets of documents being studied, but relatively infrequent compared to general term frequencies.</p> <p dir="auto">We can produce a plot with a characteristic score on the x-axis and class-association scores on the y-axis using the function <code>produce_characteristic_explorer</code>.</p> <p dir="auto">Corpus characteristicness is the difference in dense term ranks between the words in all of the documents in the study and a general English-language frequency list. See this <a href="http://nbviewer.jupyter.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Class-Association-Scores.ipynb" rel="nofollow">Talk on Term-Class Association Scores</a> for a more thorough explanation.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import scattertext as st corpus = (st.CorpusFromPandas(st.SampleCorpora.ConventionData2012.get_data(), category_col='party', text_col='text', nlp=st.whitespace_nlp_with_sentences) .build() .get_unigram_corpus() .compact(st.ClassPercentageCompactor(term_count=2, term_ranker=st.OncePerDocFrequencyRanker))) html = st.produce_characteristic_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', metadata=corpus.get_df()['speaker'] ) open('demo_characteristic_chart.html', 'wb').write(html.encode('utf-8'))"><pre><span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromPandas</span>(<span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>(), <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">text_col</span><span class="pl-c1">=</span><span class="pl-s">'text'</span>, <span class="pl-s1">nlp</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) .<span class="pl-c1">build</span>() .<span class="pl-c1">get_unigram_corpus</span>() .<span class="pl-c1">compact</span>(<span class="pl-s1">st</span>.<span class="pl-c1">ClassPercentageCompactor</span>(<span class="pl-s1">term_count</span><span class="pl-c1">=</span><span class="pl-c1">2</span>, <span class="pl-s1">term_ranker</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">OncePerDocFrequencyRanker</span>))) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_characteristic_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>] ) <span class="pl-en">open</span>(<span class="pl-s">'demo_characteristic_chart.html'</span>, <span class="pl-s">'wb'</span>).<span class="pl-c1">write</span>(<span class="pl-s1">html</span>.<span class="pl-c1">encode</span>(<span class="pl-s">'utf-8'</span>))</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_characteristic_chart.html" rel="nofollow"><img src="https://camo.githubusercontent.com/d1636497bd0cbfa8b1845ebfc39c222b6d3de2739cce51f3dce23d434521bcab/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f63686172616374657269737469635f63686172742e706e67" alt="demo_characteristic_chart.html" data-canonical-src="https://jasonkessler.github.io/demo_characteristic_chart.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Document-Based Scatterplots</h3><a id="user-content-document-based-scatterplots" class="anchor" aria-label="Permalink: Document-Based Scatterplots" href="#document-based-scatterplots"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">In addition to words, phases and topics, we can make each point correspond to a document. Let's first create a corpus object for the 2012 Conventions data set. This explanation follows <code>demo_pca_documents.py</code></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import pandas as pd from sklearn.feature_extraction.text import TfidfTransformer import scattertext as st from scipy.sparse.linalg import svds convention_df = st.SampleCorpora.ConventionData2012.get_data() convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences) corpus = (st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse') .build() .get_stoplisted_unigram_corpus())"><pre><span class="pl-k">import</span> <span class="pl-s1">pandas</span> <span class="pl-k">as</span> <span class="pl-s1">pd</span> <span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">feature_extraction</span>.<span class="pl-s1">text</span> <span class="pl-k">import</span> <span class="pl-v">TfidfTransformer</span> <span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-k">from</span> <span class="pl-s1">scipy</span>.<span class="pl-s1">sparse</span>.<span class="pl-s1">linalg</span> <span class="pl-k">import</span> <span class="pl-s1">svds</span> <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">convention_df</span>[<span class="pl-s">'parse'</span>] <span class="pl-c1">=</span> <span class="pl-s1">convention_df</span>[<span class="pl-s">'text'</span>].<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>) .<span class="pl-c1">build</span>() .<span class="pl-c1">get_stoplisted_unigram_corpus</span>())</pre></div> <p dir="auto">Next, let's add the document names as meta data in the corpus object. The <code>add_doc_names_as_metadata</code> function takes an array of document names, and populates a new corpus' meta data with those names. If two documents have the same name, it appends a number (starting with 1) to the name.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="corpus = corpus.add_doc_names_as_metadata(corpus.get_df()['speaker'])"><pre><span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">add_doc_names_as_metadata</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>])</pre></div> <p dir="auto">Next, we find tf.idf scores for the corpus' term-document matrix, run sparse SVD, and add them to a projection data frame, making the x and y-axes the first two singular values, and indexing it on the corpus' meta data, which corresponds to the document names.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="embeddings = TfidfTransformer().fit_transform(corpus.get_term_doc_mat()) u, s, vt = svds(embeddings, k=3, maxiter=20000, which='LM') projection = pd.DataFrame({'term': corpus.get_metadata(), 'x': u.T[0], 'y': u.T[1]}).set_index('term')"><pre><span class="pl-s1">embeddings</span> <span class="pl-c1">=</span> <span class="pl-en">TfidfTransformer</span>().<span class="pl-c1">fit_transform</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_term_doc_mat</span>()) <span class="pl-s1">u</span>, <span class="pl-s1">s</span>, <span class="pl-s1">vt</span> <span class="pl-c1">=</span> <span class="pl-en">svds</span>(<span class="pl-s1">embeddings</span>, <span class="pl-s1">k</span><span class="pl-c1">=</span><span class="pl-c1">3</span>, <span class="pl-s1">maxiter</span><span class="pl-c1">=</span><span class="pl-c1">20000</span>, <span class="pl-s1">which</span><span class="pl-c1">=</span><span class="pl-s">'LM'</span>) <span class="pl-s1">projection</span> <span class="pl-c1">=</span> <span class="pl-s1">pd</span>.<span class="pl-c1">DataFrame</span>({<span class="pl-s">'term'</span>: <span class="pl-s1">corpus</span>.<span class="pl-c1">get_metadata</span>(), <span class="pl-s">'x'</span>: <span class="pl-s1">u</span>.<span class="pl-c1">T</span>[<span class="pl-c1">0</span>], <span class="pl-s">'y'</span>: <span class="pl-s1">u</span>.<span class="pl-c1">T</span>[<span class="pl-c1">1</span>]}).<span class="pl-c1">set_index</span>(<span class="pl-s">'term'</span>)</pre></div> <p dir="auto">Finally, set scores as 1 for Democrats and 0 for Republicans, rendering Republican documents as red points and Democratic documents as blue. For more on the <code>produce_pca_explorer</code> function, see <a href="#using-svd-to-visualize-any-kind-of-word-embeddings">Using SVD to visualize any kind of word embeddings</a>.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="category = 'democrat' scores = (corpus.get_category_ids() == corpus.get_categories().index(category)).astype(int) html = st.produce_pca_explorer(corpus, category=category, category_name='Democratic', not_category_name='Republican', metadata=convention_df['speaker'], width_in_pixels=1000, show_axes=False, use_non_text_features=True, use_full_doc=True, projection=projection, scores=scores, show_top_terms=False)"><pre><span class="pl-s1">category</span> <span class="pl-c1">=</span> <span class="pl-s">'democrat'</span> <span class="pl-s1">scores</span> <span class="pl-c1">=</span> (<span class="pl-s1">corpus</span>.<span class="pl-c1">get_category_ids</span>() <span class="pl-c1">==</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_categories</span>().<span class="pl-c1">index</span>(<span class="pl-s1">category</span>)).<span class="pl-c1">astype</span>(<span class="pl-s1">int</span>) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_pca_explorer</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s1">category</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>[<span class="pl-s">'speaker'</span>], <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">show_axes</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">use_non_text_features</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">use_full_doc</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">projection</span><span class="pl-c1">=</span><span class="pl-s1">projection</span>, <span class="pl-s1">scores</span><span class="pl-c1">=</span><span class="pl-s1">scores</span>, <span class="pl-s1">show_top_terms</span><span class="pl-c1">=</span><span class="pl-c1">False</span>)</pre></div> <p dir="auto">Click for an interactive version <a href="https://jasonkessler.github.io/demo_pca_documents.html" rel="nofollow"><img src="https://camo.githubusercontent.com/05e45f9f4a72357764039f8ffc92260448a79f5050c9b6f8f1cab538d9108c0f/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f646f635f7063612e706e67" alt="demo_pca_documents.html" data-canonical-src="https://jasonkessler.github.io/doc_pca.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using Cohen's d or Hedge's g to visualize effect size.</h3><a id="user-content-using-cohens-d-or-hedges-g-to-visualize-effect-size" class="anchor" aria-label="Permalink: Using Cohen's d or Hedge's g to visualize effect size." href="#using-cohens-d-or-hedges-g-to-visualize-effect-size"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Cohen's d is a popular metric used to measure effect size. The definitions of Cohen's d and Hedge's <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$g$</math-renderer> from (Shinichi and Cuthill 2017) are implemented in Scattertext.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() &gt;&gt;&gt; corpus = (st.CorpusFromPandas(convention_df, ... category_col='party', ...text_col='text', ...nlp=st.whitespace_nlp_with_sentences) ....build() ....get_unigram_corpus())"><pre><span class="pl-c1">&gt;</span><span class="pl-c1">&gt;&gt;</span> <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-c1">&gt;</span><span class="pl-c1">&gt;&gt;</span> <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromPandas</span>(<span class="pl-s1">convention_df</span>, ... <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, ...<span class="pl-s1">text_col</span><span class="pl-c1">=</span><span class="pl-s">'text'</span>, ...<span class="pl-s1">nlp</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) ....<span class="pl-c1">build</span>() ....<span class="pl-c1">get_unigram_corpus</span>())</pre></div> <p dir="auto">We can create a term scorer object to examine the effect sizes and other metrics.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="&gt;&gt; &gt; term_scorer = st.CohensD(corpus).set_categories('democrat', ['republican']) &gt;&gt; &gt; term_scorer.get_score_df().sort_values(by='cohens_d', ascending=False).head() cohens_d cohens_d_se cohens_d_z cohens_d_p hedges_g hedges_g_se hedges_g_z hedges_g_p m1 m2 obama 1.187378 0.024588 48.290444 0.000000e+00 1.187322 0.018419 64.461363 0.0 0.007778 0.002795 class 0.855859 0.020848 41.052045 0.000000e+00 0.855818 0.017227 49.677688 0.0 0.002222 0.000375 middle 0.826895 0.020553 40.232746 0.000000e+00 0.826857 0.017138 48.245626 0.0 0.002316 0.000400 president 0.820825 0.020492 40.056541 0.000000e+00 0.820786 0.017120 47.942661 0.0 0.010231 0.005369 barack 0.730624 0.019616 37.245725 6.213052e-304 0.730589 0.016862 43.327800 0.0 0.002547 0.000725"><pre><span class="pl-c1">&gt;&gt;</span> <span class="pl-c1">&gt;</span> <span class="pl-s1">term_scorer</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CohensD</span>(<span class="pl-s1">corpus</span>).<span class="pl-c1">set_categories</span>(<span class="pl-s">'democrat'</span>, [<span class="pl-s">'republican'</span>])<span class="pl-s1"></span> <span class="pl-c1">&gt;&gt;</span> <span class="pl-c1">&gt;</span> <span class="pl-s1">term_scorer</span>.<span class="pl-c1">get_score_df</span>().<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'cohens_d'</span>, <span class="pl-s1">ascending</span><span class="pl-c1">=</span><span class="pl-c1">False</span>).<span class="pl-c1">head</span>() <span class="pl-s1">cohens_d</span> <span class="pl-s1">cohens_d_se</span> <span class="pl-s1">cohens_d_z</span> <span class="pl-s1">cohens_d_p</span> <span class="pl-s1">hedges_g</span> <span class="pl-s1">hedges_g_se</span> <span class="pl-s1">hedges_g_z</span> <span class="pl-s1">hedges_g_p</span> <span class="pl-s1">m1</span> <span class="pl-s1">m2</span> <span class="pl-s1">obama</span> <span class="pl-c1">1.187378</span> <span class="pl-c1">0.024588</span> <span class="pl-c1">48.290444</span> <span class="pl-c1">0.000000e+00</span> <span class="pl-c1">1.187322</span> <span class="pl-c1">0.018419</span> <span class="pl-c1">64.461363</span> <span class="pl-c1">0.0</span> <span class="pl-c1">0.007778</span> <span class="pl-c1">0.002795</span> <span class="pl-k">class</span> <span class="pl-c1">0.855859</span> <span class="pl-c1">0.020848</span> <span class="pl-c1">41.052045</span> <span class="pl-c1">0.000000e+00</span> <span class="pl-c1">0.855818</span> <span class="pl-c1">0.017227</span> <span class="pl-c1">49.677688</span> <span class="pl-c1">0.0</span> <span class="pl-c1">0.002222</span> <span class="pl-c1">0.000375</span> <span class="pl-s1">middle</span> <span class="pl-c1">0.826895</span> <span class="pl-c1">0.020553</span> <span class="pl-c1">40.232746</span> <span class="pl-c1">0.000000e+00</span> <span class="pl-c1">0.826857</span> <span class="pl-c1">0.017138</span> <span class="pl-c1">48.245626</span> <span class="pl-c1">0.0</span> <span class="pl-c1">0.002316</span> <span class="pl-c1">0.000400</span> <span class="pl-s1">president</span> <span class="pl-c1">0.820825</span> <span class="pl-c1">0.020492</span> <span class="pl-c1">40.056541</span> <span class="pl-c1">0.000000e+00</span> <span class="pl-c1">0.820786</span> <span class="pl-c1">0.017120</span> <span class="pl-c1">47.942661</span> <span class="pl-c1">0.0</span> <span class="pl-c1">0.010231</span> <span class="pl-c1">0.005369</span> <span class="pl-s1">barack</span> <span class="pl-c1">0.730624</span> <span class="pl-c1">0.019616</span> <span class="pl-c1">37.245725</span> <span class="pl-c1">6.213052e-304</span> <span class="pl-c1">0.730589</span> <span class="pl-c1">0.016862</span> <span class="pl-c1">43.327800</span> <span class="pl-c1">0.0</span> <span class="pl-c1">0.002547</span> <span class="pl-c1">0.000725</span></pre></div> <p dir="auto">Our calculation of Cohen's d is not directly based on term counts. Rather, we divide each document's term counts by the total number of terms in the document before calculating the statistics. <code>m1</code> and <code>m2</code> are, respectively the mean portions of words in speeches made by Democrats and Republicans that were the term in question. The effect size (<code>cohens_d</code>) is the difference between these means divided by the pooled standard deviation. <code>cohens_d_se</code> is the standard error of the statistic, while <code>cohens_d_z</code> and <code>cohens_d_p</code> are the Z-scores and p-values indicating the statistical significance of the effect. Corresponding columns are present for Hedge's <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$g$</math-renderer> a version of Cohen's d adjusted for data set size.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; st.produce_frequency_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', term_scorer=st.CohensD(corpus), metadata=convention_df['speaker'], grey_threshold=0 )"><pre><span class="pl-c1">&gt;</span><span class="pl-c1">&gt;&gt;</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_frequency_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">term_scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">CohensD</span>(<span class="pl-s1">corpus</span>), <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>[<span class="pl-s">'speaker'</span>], <span class="pl-s1">grey_threshold</span><span class="pl-c1">=</span><span class="pl-c1">0</span> )</pre></div> <p dir="auto">Click for an interactive version. <a href="https://jasonkessler.github.io/demo_cohens_d.html" rel="nofollow"><img src="https://camo.githubusercontent.com/5634036c343602f97e0cab4c656df9800f742efd05420fcb0352b86c28b3d773/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f636f68656e5f642e706e67" alt="demo_cohens_d.html" data-canonical-src="https://jasonkessler.github.io/cohen_d.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using Cliff's Delta to visualize effect size</h3><a id="user-content-using-cliffs-delta-to-visualize-effect-size" class="anchor" aria-label="Permalink: Using Cliff's Delta to visualize effect size" href="#using-cliffs-delta-to-visualize-effect-size"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Cliff's Delta (Cliff 1993) uses a non-parametric approach to computing effect size. In our setting, the term's frequency percentage of each document in the focus set is compared with that of the background set. For each pair of documents, a score of 1 is given if the focus document's frequency percentage is larger than the background, 0 if identical, and -1 if different. Note that this assumes document lengths are similarly distributed across the focus and background corpora.</p> <p dir="auto">See [<a href="https://real-statistics.com/non-parametric-tests/mann-whitney-test/cliffs-delta/" rel="nofollow">https://real-statistics.com/non-parametric-tests/mann-whitney-test/cliffs-delta/</a>] for the formulas used in <code>CliffsDelta</code>.</p> <p dir="auto">Below is an example of how to use <code>CliffsDelta</code> to find and plot term scores:</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="nlp = spacy.blank('en') nlp.add_pipe('sentencizer') convention_df = st.SampleCorpora.ConventionData2012.get_data().assign( party = lambda df: df.party.apply( lambda x: {'democrat': 'Dem', 'republican': 'Rep'}[x]), SpacyParse=lambda df: df.text.progress_apply(nlp) ) corpus = st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='SpacyParse').build( ).remove_terms_used_in_less_than_num_docs(10) st.CliffsDelta(corpus).set_categories('Dem').get_score_df().sort_values(by='Dem', ascending=False).iloc[:10]"><pre><span class="pl-s1">nlp</span> <span class="pl-c1">=</span> <span class="pl-s1">spacy</span>.<span class="pl-c1">blank</span>(<span class="pl-s">'en'</span>) <span class="pl-s1">nlp</span>.<span class="pl-c1">add_pipe</span>(<span class="pl-s">'sentencizer'</span>) <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>().<span class="pl-c1">assign</span>( <span class="pl-s1">party</span> <span class="pl-c1">=</span> <span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">party</span>.<span class="pl-c1">apply</span>( <span class="pl-k">lambda</span> <span class="pl-s1">x</span>: {<span class="pl-s">'democrat'</span>: <span class="pl-s">'Dem'</span>, <span class="pl-s">'republican'</span>: <span class="pl-s">'Rep'</span>}[<span class="pl-s1">x</span>]), <span class="pl-v">SpacyParse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">progress_apply</span>(<span class="pl-s1">nlp</span>) ) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'SpacyParse'</span>).<span class="pl-c1">build</span>( ).<span class="pl-c1">remove_terms_used_in_less_than_num_docs</span>(<span class="pl-c1">10</span>) <span class="pl-s1">st</span>.<span class="pl-c1">CliffsDelta</span>(<span class="pl-s1">corpus</span>).<span class="pl-c1">set_categories</span>(<span class="pl-s">'Dem'</span>).<span class="pl-c1">get_score_df</span>().<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'Dem'</span>, <span class="pl-s1">ascending</span><span class="pl-c1">=</span><span class="pl-c1">False</span>).<span class="pl-c1">iloc</span>[:<span class="pl-c1">10</span>]</pre></div> <markdown-accessiblity-table><table> <thead> <tr> <th align="left">term</th> <th align="right">Metric</th> <th align="right">Stddev</th> <th align="right">Low-5.0% CI</th> <th align="right">High-5.0% CI</th> <th align="right">TermCount1</th> <th align="right">TermCount2</th> <th align="right">DocCount1</th> <th align="right">DocCount2</th> </tr> </thead> <tbody> <tr> <td align="left">obama</td> <td align="right">0.597191</td> <td align="right">0.0266606</td> <td align="right">-1.35507</td> <td align="right">-1.03477</td> <td align="right">537</td> <td align="right">165</td> <td align="right">113</td> <td align="right">40</td> </tr> <tr> <td align="left">president obama</td> <td align="right">0.565903</td> <td align="right">0.0314348</td> <td align="right">-2.37978</td> <td align="right">-1.74131</td> <td align="right">351</td> <td align="right">78</td> <td align="right">100</td> <td align="right">30</td> </tr> <tr> <td align="left">president</td> <td align="right">0.426337</td> <td align="right">0.0293418</td> <td align="right">1.22784</td> <td align="right">0.909226</td> <td align="right">740</td> <td align="right">301</td> <td align="right">113</td> <td align="right">53</td> </tr> <tr> <td align="left">middle</td> <td align="right">0.417591</td> <td align="right">0.0267365</td> <td align="right">1.10791</td> <td align="right">0.840932</td> <td align="right">164</td> <td align="right">27</td> <td align="right">68</td> <td align="right">12</td> </tr> <tr> <td align="left">class</td> <td align="right">0.415373</td> <td align="right">0.0280622</td> <td align="right">1.09032</td> <td align="right">0.815649</td> <td align="right">161</td> <td align="right">25</td> <td align="right">69</td> <td align="right">14</td> </tr> <tr> <td align="left">barack</td> <td align="right">0.406997</td> <td align="right">0.0281692</td> <td align="right">1.00765</td> <td align="right">0.750963</td> <td align="right">202</td> <td align="right">46</td> <td align="right">76</td> <td align="right">16</td> </tr> <tr> <td align="left">barack obama</td> <td align="right">0.402562</td> <td align="right">0.027512</td> <td align="right">0.965359</td> <td align="right">0.723403</td> <td align="right">164</td> <td align="right">45</td> <td align="right">76</td> <td align="right">16</td> </tr> <tr> <td align="left">that 's</td> <td align="right">0.384085</td> <td align="right">0.0227344</td> <td align="right">0.809747</td> <td align="right">0.634705</td> <td align="right">236</td> <td align="right">91</td> <td align="right">89</td> <td align="right">31</td> </tr> <tr> <td align="left">obama .</td> <td align="right">0.356245</td> <td align="right">0.0237453</td> <td align="right">0.664688</td> <td align="right">0.509631</td> <td align="right">70</td> <td align="right">5</td> <td align="right">49</td> <td align="right">4</td> </tr> <tr> <td align="left">for</td> <td align="right">0.35526</td> <td align="right">0.0364138</td> <td align="right">0.70142</td> <td align="right">0.46487</td> <td align="right">1020</td> <td align="right">542</td> <td align="right">119</td> <td align="right">62</td> </tr> </tbody> </table></markdown-accessiblity-table> <p dir="auto">We can elegantly display the Cliff's delta scores using <code>dataframe_scattertext</code>, and describe the point coloring scheme using the <code>include_gradient=True</code> parameter. We set the <code>left_gradient_term</code>, <code>middle_gradient_term</code>, and <code>right_gradient_term</code> parameters to strings which will appear in their corresonding values.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="plot_df = st.CliffsDelta( corpus ).set_categories( category_name='Dem' ).get_score_df().rename(columns={'Metric': 'CliffsDelta'}).assign( Frequency=lambda df: df.TermCount1 + df.TermCount1, X=lambda df: df.Frequency, Y=lambda df: df.CliffsDelta, Xpos=lambda df: st.Scalers.dense_rank(df.X), Ypos=lambda df: st.Scalers.scale_center_zero_abs(df.Y), ColorScore=lambda df: df.Ypos, ) html = st.dataframe_scattertext( corpus, plot_df=plot_df, category='Dem', category_name='Dem', not_category_name='Rep', width_in_pixels=1000, ignore_categories=False, metadata=lambda corpus: corpus.get_df()['speaker'], color_score_column='ColorScore', left_list_column='ColorScore', show_characteristic=False, y_label=&quot;Cliff's Delta&quot;, x_label='Frequency Ranks', y_axis_labels=[f'More Rep: delta={plot_df.CliffsDelta.max():.3f}', '', f'More Dem: delta={-plot_df.CliffsDelta.max():.3f}'], tooltip_columns=['Frequency', 'CliffsDelta'], term_description_columns=['CliffsDelta', 'Stddev', 'Low-95.0% CI', 'High-95.0% CI'], header_names={'upper': 'Top Dem', 'lower': 'Top Reps'}, horizontal_line_y_position=0, include_gradient=True, left_gradient_term='More Republican', right_gradient_term='More Democratic', middle_gradient_term=&quot;Metric: Cliff's Delta&quot;, )"><pre><span class="pl-s1">plot_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CliffsDelta</span>( <span class="pl-s1">corpus</span> ).<span class="pl-c1">set_categories</span>( <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Dem'</span> ).<span class="pl-c1">get_score_df</span>().<span class="pl-c1">rename</span>(<span class="pl-s1">columns</span><span class="pl-c1">=</span>{<span class="pl-s">'Metric'</span>: <span class="pl-s">'CliffsDelta'</span>}).<span class="pl-c1">assign</span>( <span class="pl-v">Frequency</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">TermCount1</span> <span class="pl-c1">+</span> <span class="pl-s1">df</span>.<span class="pl-c1">TermCount1</span>, <span class="pl-c1">X</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">Frequency</span>, <span class="pl-c1">Y</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">CliffsDelta</span>, <span class="pl-v">Xpos</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">dense_rank</span>(<span class="pl-s1">df</span>.<span class="pl-c1">X</span>), <span class="pl-v">Ypos</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">scale_center_zero_abs</span>(<span class="pl-s1">df</span>.<span class="pl-c1">Y</span>), <span class="pl-v">ColorScore</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">Ypos</span>, ) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">dataframe_scattertext</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">plot_df</span><span class="pl-c1">=</span><span class="pl-s1">plot_df</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'Dem'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Dem'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Rep'</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">ignore_categories</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">corpus</span>: <span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>], <span class="pl-s1">color_score_column</span><span class="pl-c1">=</span><span class="pl-s">'ColorScore'</span>, <span class="pl-s1">left_list_column</span><span class="pl-c1">=</span><span class="pl-s">'ColorScore'</span>, <span class="pl-s1">show_characteristic</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">"Cliff's Delta"</span>, <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Frequency Ranks'</span>, <span class="pl-s1">y_axis_labels</span><span class="pl-c1">=</span>[<span class="pl-s">f'More Rep: delta=<span class="pl-s1"><span class="pl-kos">{</span><span class="pl-s1">plot_df</span>.<span class="pl-c1">CliffsDelta</span>.<span class="pl-c1">max</span>():.3f<span class="pl-kos">}</span></span>'</span>, <span class="pl-s">''</span>, <span class="pl-s">f'More Dem: delta=<span class="pl-s1"><span class="pl-kos">{</span><span class="pl-c1">-</span><span class="pl-s1">plot_df</span>.<span class="pl-c1">CliffsDelta</span>.<span class="pl-c1">max</span>():.3f<span class="pl-kos">}</span></span>'</span>], <span class="pl-s1">tooltip_columns</span><span class="pl-c1">=</span>[<span class="pl-s">'Frequency'</span>, <span class="pl-s">'CliffsDelta'</span>], <span class="pl-s1">term_description_columns</span><span class="pl-c1">=</span>[<span class="pl-s">'CliffsDelta'</span>, <span class="pl-s">'Stddev'</span>, <span class="pl-s">'Low-95.0% CI'</span>, <span class="pl-s">'High-95.0% CI'</span>], <span class="pl-s1">header_names</span><span class="pl-c1">=</span>{<span class="pl-s">'upper'</span>: <span class="pl-s">'Top Dem'</span>, <span class="pl-s">'lower'</span>: <span class="pl-s">'Top Reps'</span>}, <span class="pl-s1">horizontal_line_y_position</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">include_gradient</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">left_gradient_term</span><span class="pl-c1">=</span><span class="pl-s">'More Republican'</span>, <span class="pl-s1">right_gradient_term</span><span class="pl-c1">=</span><span class="pl-s">'More Democratic'</span>, <span class="pl-s1">middle_gradient_term</span><span class="pl-c1">=</span><span class="pl-s">"Metric: Cliff's Delta"</span>, )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_cliffs_delta.html" rel="nofollow"><img src="https://camo.githubusercontent.com/80f777c69f1230b1ba3538c2f03e57e2498b44d4f99593a64f525ea426dbe1f7/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f636c6966667364656c74612e706e67" alt="demo_cliffs_delta.html" data-canonical-src="https://jasonkessler.github.io/cliffsdelta.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using Bi-Normal Separation (BNS) to score terms</h3><a id="user-content-using-bi-normal-separation-bns-to-score-terms" class="anchor" aria-label="Permalink: Using Bi-Normal Separation (BNS) to score terms" href="#using-bi-normal-separation-bns-to-score-terms"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Bi-Normal Separation (BNS) (Forman, 2008) was added in version 0.1.8. A variation of (BNS) is used where <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$F^{-1}(tpr) - F^{-1}(fpr)$</math-renderer> is not used as an absolute value, but kept as a difference. This allows for terms strongly indicative of true positives and false positives to have a high or low score. Note that tpr and fpr are scaled to between <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$[\alpha, 1-\alpha]$</math-renderer> where alpha is <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$\in [0, 1]$</math-renderer>. In Forman (2008) and earlier literature <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$\alpha=0.0005$</math-renderer>. In personal correspondence with Forman, he kindly suggested using <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$\frac{1.}{\mbox{minimum(positives, negatives)}}$</math-renderer>. I have implemented this as <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$\alpha=\frac{1.}{\mbox{minimum documents in the least frequent category}}$</math-renderer></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="corpus = (st.CorpusFromPandas(convention_df, category_col='party', text_col='text', nlp=st.whitespace_nlp_with_sentences) .build() .get_unigram_corpus() .remove_infrequent_words(3, term_ranker=st.OncePerDocFrequencyRanker)) term_scorer = (st.BNSScorer(corpus).set_categories('democrat')) print(term_scorer.get_score_df().sort_values(by='democrat BNS')) html = st.produce_frequency_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', scores=term_scorer.get_score_df()['democrat BNS'].reindex(corpus.get_terms()).values, metadata=lambda c: c.get_df()['speaker'], minimum_term_frequency=0, grey_threshold=0, y_label=f'Bi-normal Separation (alpha={term_scorer.prior_counts})' )"><pre><span class="pl-s1">corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromPandas</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">text_col</span><span class="pl-c1">=</span><span class="pl-s">'text'</span>, <span class="pl-s1">nlp</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) .<span class="pl-c1">build</span>() .<span class="pl-c1">get_unigram_corpus</span>() .<span class="pl-c1">remove_infrequent_words</span>(<span class="pl-c1">3</span>, <span class="pl-s1">term_ranker</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">OncePerDocFrequencyRanker</span>)) <span class="pl-s1">term_scorer</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">BNSScorer</span>(<span class="pl-s1">corpus</span>).<span class="pl-c1">set_categories</span>(<span class="pl-s">'democrat'</span>)) <span class="pl-en">print</span>(<span class="pl-s1">term_scorer</span>.<span class="pl-c1">get_score_df</span>().<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'democrat BNS'</span>)) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_frequency_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">scores</span><span class="pl-c1">=</span><span class="pl-s1">term_scorer</span>.<span class="pl-c1">get_score_df</span>()[<span class="pl-s">'democrat BNS'</span>].<span class="pl-c1">reindex</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_terms</span>()).<span class="pl-c1">values</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">c</span>: <span class="pl-s1">c</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>], <span class="pl-s1">minimum_term_frequency</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">grey_threshold</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">f'Bi-normal Separation (alpha=<span class="pl-s1"><span class="pl-kos">{</span><span class="pl-s1">term_scorer</span>.<span class="pl-c1">prior_counts</span><span class="pl-kos">}</span></span>)'</span> )</pre></div> <p dir="auto">BNS Scored terms using an algorithmically found alpha. <a href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_bi_normal_separation.html" rel="nofollow">![BNS](https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/d emo_bi_normal_separation.png)</a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using correlations to explain classifiers</h3><a id="user-content-using-correlations-to-explain-classifiers" class="anchor" aria-label="Permalink: Using correlations to explain classifiers" href="#using-correlations-to-explain-classifiers"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We can train a classifier to produce a prediction score for each document. Often classifiers or regressors use features which take into account features beyond the ones represented by Scatterext, be they n-gram, topic, extra-linguistic, neural, etc.</p> <p dir="auto">We can use Scattertext to visualize the correlations between unigrams (or really any feature representation) and the document scores produced by a model.</p> <p dir="auto">In the following example, we train a linear SVM using unigram and bi-gram features on the entire convention data set, and use the model to make a prediction on each document, and finally using Pearson's <math-renderer class="js-inline-math" style="display: inline-block" data-static-url="https://github.githubassets.com/static" data-run-id="bf8025d7258a64274065d81987021fdd">$r$</math-renderer> to correlate unigram features to the distance from the SVM decision boundary.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="from sklearn.svm import LinearSVC import scattertext as st df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ) corpus = st.CorpusFromParsedDocuments( df, category_col='party', parsed_col='parse' ).build() X = corpus.get_term_doc_mat() y = corpus.get_category_ids() clf = LinearSVC() clf.fit(X=X, y=y == corpus.get_categories().index('democrat')) doc_scores = clf.decision_function(X=X) compactcorpus = corpus.get_unigram_corpus().compact(st.AssociationCompactor(2000)) plot_df = st.Correlations().set_correlation_type( 'pearsonr' ).get_correlation_df( corpus=compactcorpus, document_scores=doc_scores ).reindex(compactcorpus.get_terms()).assign( X=lambda df: df.Frequency, Y=lambda df: df['r'], Xpos=lambda df: st.Scalers.dense_rank(df.X), Ypos=lambda df: st.Scalers.scale_center_zero_abs(df.Y), SuppressDisplay=False, ColorScore=lambda df: df.Ypos, ) html = st.dataframe_scattertext( compactcorpus, plot_df=plot_df, category='democrat', category_name='Democratic', not_category_name='Republican', width_in_pixels=1000, metadata=lambda c: c.get_df()['speaker'], unified_context=False, ignore_categories=False, color_score_column='ColorScore', left_list_column='ColorScore', y_label=&quot;Pearson r (correlation to SVM document score)&quot;, x_label='Frequency Ranks', header_names={'upper': 'Top Democratic', 'lower': 'Top Republican'}, )"><pre><span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">svm</span> <span class="pl-k">import</span> <span class="pl-v">LinearSVC</span> <span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>().<span class="pl-c1">assign</span>( <span class="pl-s1">parse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) ) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>( <span class="pl-s1">df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span> ).<span class="pl-c1">build</span>() <span class="pl-c1">X</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_term_doc_mat</span>() <span class="pl-s1">y</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_category_ids</span>() <span class="pl-s1">clf</span> <span class="pl-c1">=</span> <span class="pl-en">LinearSVC</span>() <span class="pl-s1">clf</span>.<span class="pl-c1">fit</span>(<span class="pl-c1">X</span><span class="pl-c1">=</span><span class="pl-c1">X</span>, <span class="pl-s1">y</span><span class="pl-c1">=</span><span class="pl-s1">y</span> <span class="pl-c1">==</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_categories</span>().<span class="pl-c1">index</span>(<span class="pl-s">'democrat'</span>)) <span class="pl-s1">doc_scores</span> <span class="pl-c1">=</span> <span class="pl-s1">clf</span>.<span class="pl-c1">decision_function</span>(<span class="pl-c1">X</span><span class="pl-c1">=</span><span class="pl-c1">X</span>) <span class="pl-s1">compactcorpus</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_unigram_corpus</span>().<span class="pl-c1">compact</span>(<span class="pl-s1">st</span>.<span class="pl-c1">AssociationCompactor</span>(<span class="pl-c1">2000</span>)) <span class="pl-s1">plot_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">Correlations</span>().<span class="pl-c1">set_correlation_type</span>( <span class="pl-s">'pearsonr'</span> ).<span class="pl-c1">get_correlation_df</span>( <span class="pl-s1">corpus</span><span class="pl-c1">=</span><span class="pl-s1">compactcorpus</span>, <span class="pl-s1">document_scores</span><span class="pl-c1">=</span><span class="pl-s1">doc_scores</span> ).<span class="pl-c1">reindex</span>(<span class="pl-s1">compactcorpus</span>.<span class="pl-c1">get_terms</span>()).<span class="pl-c1">assign</span>( <span class="pl-c1">X</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">Frequency</span>, <span class="pl-c1">Y</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>[<span class="pl-s">'r'</span>], <span class="pl-v">Xpos</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">dense_rank</span>(<span class="pl-s1">df</span>.<span class="pl-c1">X</span>), <span class="pl-v">Ypos</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">scale_center_zero_abs</span>(<span class="pl-s1">df</span>.<span class="pl-c1">Y</span>), <span class="pl-v">SuppressDisplay</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-v">ColorScore</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">Ypos</span>, ) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">dataframe_scattertext</span>( <span class="pl-s1">compactcorpus</span>, <span class="pl-s1">plot_df</span><span class="pl-c1">=</span><span class="pl-s1">plot_df</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">c</span>: <span class="pl-s1">c</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>], <span class="pl-s1">unified_context</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">ignore_categories</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">color_score_column</span><span class="pl-c1">=</span><span class="pl-s">'ColorScore'</span>, <span class="pl-s1">left_list_column</span><span class="pl-c1">=</span><span class="pl-s">'ColorScore'</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">"Pearson r (correlation to SVM document score)"</span>, <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Frequency Ranks'</span>, <span class="pl-s1">header_names</span><span class="pl-c1">=</span>{<span class="pl-s">'upper'</span>: <span class="pl-s">'Top Democratic'</span>, <span class="pl-s">'lower'</span>: <span class="pl-s">'Top Republican'</span>}, )</pre></div> <p dir="auto"><a href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/pearsons.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/pearsons.png" alt="BNS" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using Custom Background Word Frequencies</h3><a id="user-content-using-custom-background-word-frequencies" class="anchor" aria-label="Permalink: Using Custom Background Word Frequencies" href="#using-custom-background-word-frequencies"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Scattertext relies on a set of general-domain English word frequencies when computing unigram characteristic<br> scores. When using running Scattertext on non-English data or in a specific domain, the quality of the scores will degrade.</p> <p dir="auto">Ensure that you are on Scattertext 0.1.6 or higher.</p> <p dir="auto">To remedy this, one can add a custom set of background scores to a Corpus-like object, using the <code>Corpus.set_background_corpus</code> function. The function takes a <code>pd.Series</code> object, indexed on terms with numeric count values.</p> <p dir="auto">By default, [!understanding-scaled-f-score](Scaled F-Score) is used to rank how characteristic terms are.</p> <p dir="auto">The example below illustrates using Polish background word frequencies.</p> <p dir="auto">First, we produce a Series object mapping Polish words to their frequencies using a list from the <a href="/JasonKessler/scattertext/blob/master/most-common-words-by-language">https://github.com/oprogramador/most-common-words-by-language</a> repo.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="polish_word_frequencies = pd.read_csv( 'https://raw.githubusercontent.com/hermitdave/FrequencyWords/master/content/2016/pl/pl_50k.txt', sep=' ', names=['Word', 'Frequency'] ).set_index('Word')['Frequency']"><pre><span class="pl-s1">polish_word_frequencies</span> <span class="pl-c1">=</span> <span class="pl-s1">pd</span>.<span class="pl-c1">read_csv</span>( <span class="pl-s">'https://raw.githubusercontent.com/hermitdave/FrequencyWords/master/content/2016/pl/pl_50k.txt'</span>, <span class="pl-s1">sep</span><span class="pl-c1">=</span><span class="pl-s">' '</span>, <span class="pl-s1">names</span><span class="pl-c1">=</span>[<span class="pl-s">'Word'</span>, <span class="pl-s">'Frequency'</span>] ).<span class="pl-c1">set_index</span>(<span class="pl-s">'Word'</span>)[<span class="pl-s">'Frequency'</span>]</pre></div> <p dir="auto">Note the composition of the Series</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="&gt;&gt; &gt; polish_word_frequencies Word nie 5875385 to 4388099 się 3507076 w 2723767 na 2309765 Name: Frequency, dtype: int64"><pre><span class="pl-c1">&gt;&gt;</span> <span class="pl-c1">&gt;</span> <span class="pl-s1">polish_word_frequencies</span> <span class="pl-v">Word</span> <span class="pl-s1">nie</span> <span class="pl-c1">5875385</span> <span class="pl-s1">to</span> <span class="pl-c1">4388099</span> <span class="pl-s1">się</span> <span class="pl-c1">3507076</span> <span class="pl-s1">w</span> <span class="pl-c1">2723767</span> <span class="pl-s1">na</span> <span class="pl-c1">2309765</span> <span class="pl-v">Name</span>: <span class="pl-smi">Frequency</span>, <span class="pl-s1">dtype</span>: <span class="pl-smi">int64</span></pre></div> <p dir="auto">Next, we build a DataFrame, <code>reviews_df</code>, consisting of document which appear (to a non-Polish speaker) to be positive and negative hotel reviews from the <a href="/JasonKessler/scattertext/blob/master/PolEmo2.0">https://klejbenchmark.com/tasks/</a> corpus (Kocoń, et al. 2019). Note this data is under a CC BY-NC-SA 4.0 license. These are labeled as "__label__meta_plus_m" and "__label__meta_minus_m". We will use Scattertext to compare those reviews and determine</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="nlp = spacy.blank('pl') nlp.add_pipe('sentencizer') with ZipFile(io.BytesIO(urlopen( 'https://klejbenchmark.com/static/data/klej_polemo2.0-in.zip' ).read())) as zf: review_df = pd.read_csv(zf.open('train.tsv'), sep='\t')[ lambda df: df.target.isin(['__label__meta_plus_m', '__label__meta_minus_m']) ].assign( Parse=lambda df: df.sentence.apply(nlp) )"><pre><span class="pl-s1">nlp</span> <span class="pl-c1">=</span> <span class="pl-s1">spacy</span>.<span class="pl-c1">blank</span>(<span class="pl-s">'pl'</span>) <span class="pl-s1">nlp</span>.<span class="pl-c1">add_pipe</span>(<span class="pl-s">'sentencizer'</span>) <span class="pl-k">with</span> <span class="pl-en">ZipFile</span>(<span class="pl-s1">io</span>.<span class="pl-c1">BytesIO</span>(<span class="pl-en">urlopen</span>( <span class="pl-s">'https://klejbenchmark.com/static/data/klej_polemo2.0-in.zip'</span> ).<span class="pl-c1">read</span>())) <span class="pl-k">as</span> <span class="pl-s1">zf</span>: <span class="pl-s1">review_df</span> <span class="pl-c1">=</span> <span class="pl-s1">pd</span>.<span class="pl-c1">read_csv</span>(<span class="pl-s1">zf</span>.<span class="pl-c1">open</span>(<span class="pl-s">'train.tsv'</span>), <span class="pl-s1">sep</span><span class="pl-c1">=</span><span class="pl-s">'<span class="pl-cce">\t</span>'</span>)[ <span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">target</span>.<span class="pl-c1">isin</span>([<span class="pl-s">'__label__meta_plus_m'</span>, <span class="pl-s">'__label__meta_minus_m'</span>]) ].<span class="pl-c1">assign</span>( <span class="pl-v">Parse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">sentence</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">nlp</span>) )</pre></div> <p dir="auto">Next, we wish to create a <code>ParsedCorpus</code> object from <code>review_df</code>. In preparation, we first assemble a list of Polish stopwords from the <a href="https://github.com/bieli/stopwords/">stopwords</a> repository. We also create the <code>not_a_word</code> regular expression to filter out terms which do not contain a letter.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="polish_stopwords = { stopword for stopword in urlopen( 'https://raw.githubusercontent.com/bieli/stopwords/master/polish.stopwords.txt' ).read().decode('utf-8').split('\n') if stopword.strip() } not_a_word = re.compile(r'^\W+$')"><pre><span class="pl-s1">polish_stopwords</span> <span class="pl-c1">=</span> { <span class="pl-s1">stopword</span> <span class="pl-k">for</span> <span class="pl-s1">stopword</span> <span class="pl-c1">in</span> <span class="pl-en">urlopen</span>( <span class="pl-s">'https://raw.githubusercontent.com/bieli/stopwords/master/polish.stopwords.txt'</span> ).<span class="pl-c1">read</span>().<span class="pl-c1">decode</span>(<span class="pl-s">'utf-8'</span>).<span class="pl-c1">split</span>(<span class="pl-s">'<span class="pl-cce">\n</span>'</span>) <span class="pl-k">if</span> <span class="pl-s1">stopword</span>.<span class="pl-c1">strip</span>() } <span class="pl-s1">not_a_word</span> <span class="pl-c1">=</span> <span class="pl-s1">re</span>.<span class="pl-c1">compile</span>(<span class="pl-s">r'^\W+$'</span>)</pre></div> <p dir="auto">With these present, we can build a corpus from <code>review_df</code> with the category being the binary "target" column. We reduce the term space to unigrams and then run the <code>filter_out</code> which takes a function to determine if a term should be removed from the corpus. The function identifies terms which are in the Polish stoplist or do not contain a letter. Finally, terms occurring less than 20 times in the corpus are removed.</p> <p dir="auto">We set the background frequency Series we created early as the background corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="corpus = st.CorpusFromParsedDocuments( review_df, category_col='target', parsed_col='Parse' ).build( ).get_unigram_corpus( ).filter_out( lambda term: term in polish_stopwords or not_a_word.match(term) is not None ).remove_infrequent_words( minimum_term_count=20 ).set_background_corpus( polish_word_frequencies )"><pre><span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>( <span class="pl-s1">review_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'target'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'Parse'</span> ).<span class="pl-c1">build</span>( ).<span class="pl-c1">get_unigram_corpus</span>( ).<span class="pl-c1">filter_out</span>( <span class="pl-k">lambda</span> <span class="pl-s1">term</span>: <span class="pl-s1">term</span> <span class="pl-c1">in</span> <span class="pl-s1">polish_stopwords</span> <span class="pl-c1">or</span> <span class="pl-s1">not_a_word</span>.<span class="pl-c1">match</span>(<span class="pl-s1">term</span>) <span class="pl-c1"><span class="pl-c1">is</span> <span class="pl-c1">not</span></span> <span class="pl-c1">None</span> ).<span class="pl-c1">remove_infrequent_words</span>( <span class="pl-s1">minimum_term_count</span><span class="pl-c1">=</span><span class="pl-c1">20</span> ).<span class="pl-c1">set_background_corpus</span>( <span class="pl-s1">polish_word_frequencies</span> )</pre></div> <p dir="auto">Note that a minimum word count of 20 was chosen to ensure that only around 2,000 terms would be displayed</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="&gt;&gt; &gt; corpus.get_num_terms() 2023"><pre><span class="pl-c1">&gt;&gt;</span> <span class="pl-c1">&gt;</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_num_terms</span>() <span class="pl-c1">2023</span></pre></div> <p dir="auto">Running <code>get_term_and_background_counts</code> shows us total term counts in the corpus compare to background frequency counts. We limit this to terms which only occur in the corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="&gt;&gt; &gt; corpus.get_term_and_background_counts()[ ... lambda df: df.corpus &gt; 0 ...].sort_values(by='corpus', ascending=False) background corpus m 341583838.0 4819.0 hotelu 33108.0 1812.0 hotel 297974790.0 1651.0 doktor 154840.0 1534.0 polecam 0.0 1438.0 ......... szoku 0.0 21.0 badaniem 0.0 21.0 balkonu 0.0 21.0 stopnia 0.0 21.0 wobec 0.0 21.0"><pre><span class="pl-c1">&gt;&gt;</span> <span class="pl-c1">&gt;</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_term_and_background_counts</span>()[ ... <span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">corpus</span> <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span> ...].<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'corpus'</span>, <span class="pl-s1">ascending</span><span class="pl-c1">=</span><span class="pl-c1">False</span>) <span class="pl-s1">background</span> <span class="pl-s1">corpus</span> <span class="pl-s1">m</span> <span class="pl-c1">341583838.0</span> <span class="pl-c1">4819.0</span> <span class="pl-s1">hotelu</span> <span class="pl-c1">33108.0</span> <span class="pl-c1">1812.0</span> <span class="pl-s1">hotel</span> <span class="pl-c1">297974790.0</span> <span class="pl-c1">1651.0</span> <span class="pl-s1">doktor</span> <span class="pl-c1">154840.0</span> <span class="pl-c1">1534.0</span> <span class="pl-s1">polecam</span> <span class="pl-c1">0.0</span> <span class="pl-c1">1438.0</span> ......... <span class="pl-c1">szoku</span> <span class="pl-c1">0.0</span> <span class="pl-c1">21.0</span> <span class="pl-s1">badaniem</span> <span class="pl-c1">0.0</span> <span class="pl-c1">21.0</span> <span class="pl-s1">balkonu</span> <span class="pl-c1">0.0</span> <span class="pl-c1">21.0</span> <span class="pl-s1">stopnia</span> <span class="pl-c1">0.0</span> <span class="pl-c1">21.0</span> <span class="pl-s1">wobec</span> <span class="pl-c1">0.0</span> <span class="pl-c1">21.0</span></pre></div> <p dir="auto">Interesting, the term "polecam" appears very frequently in the corpus, but does not appear at all in the background corpus, making it highly characteristic. Judging from Google Translate, it appears to mean something related to "recommend".</p> <p dir="auto">We are now ready to display the plot.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_scattertext_explorer( corpus, category='__label__meta_plus_m', category_name='Plus-M', not_category_name='Minus-M', minimum_term_frequency=1, width_in_pixels=1000, transform=st.Scalers.dense_rank )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'__label__meta_plus_m'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Plus-M'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Minus-M'</span>, <span class="pl-s1">minimum_term_frequency</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">transform</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">dense_rank</span> )</pre></div> <p dir="auto"><a href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_pos_neg_scattertext.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_pos_neg_scattertext.png" alt="Polish Scattertext" style="max-width: 100%;"></a></p> <p dir="auto">We can change the formula which is used to produce the Characteristic scores using the <code>characteristic_scorer</code> parameter to <code>produce_scattertext_explorer</code>.</p> <p dir="auto">It takes a instance of a descendant of the <code>CharacteristicScorer</code> class. See <a href="https://github.com/JasonKessler/scattertext/blob/8ddff82f670aa2ed40312b2cdd077e7f0a98a873/scattertext/characteristic/DenseRankCharacteristicness.py#L36">DenseRankCharacteristicness.py</a> for an example of how to make your own.</p> <p dir="auto">Example of plotting with a modified characteristic scorer,</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_scattertext_explorer( corpus, category='__label__meta_plus_m', category_name='Plus-M', not_category_name='Minus-M', minimum_term_frequency=1, transform=st.Scalers.dense_rank, characteristic_scorer=st.DenseRankCharacteristicness(), term_ranker=st.termranking.AbsoluteFrequencyRanker, term_scorer=st.ScaledFScorePresets(beta=1, one_to_neg_one=True) ).encode('utf-8')) print('open ' + fn) "><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'__label__meta_plus_m'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Plus-M'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Minus-M'</span>, <span class="pl-s1">minimum_term_frequency</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">transform</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">dense_rank</span>, <span class="pl-s1">characteristic_scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">DenseRankCharacteristicness</span>(), <span class="pl-s1">term_ranker</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">termranking</span>.<span class="pl-c1">AbsoluteFrequencyRanker</span>, <span class="pl-s1">term_scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">ScaledFScorePresets</span>(<span class="pl-s1">beta</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">one_to_neg_one</span><span class="pl-c1">=</span><span class="pl-c1">True</span>) ).<span class="pl-c1">encode</span>(<span class="pl-s">'utf-8'</span>)) <span class="pl-k">print</span>(<span class="pl-s">'open '</span> <span class="pl-c1">+</span> <span class="pl-s1">fn</span>)</pre></div> <p dir="auto"><a href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_dense_rank_characteristic.png" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/polish_dense_rank_characteristic.png" alt="Polish Scattertext DenseRank" style="max-width: 100%;"></a></p> <p dir="auto">Note that numbers show up as more characteristic using the Dense Rank Difference. It may be they occur unusually frequently in this corpus, or perhaps the background word frequencies under counted mumbers.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Plotting word productivity</h3><a id="user-content-plotting-word-productivity" class="anchor" aria-label="Permalink: Plotting word productivity" href="#plotting-word-productivity"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Word productivity is one strategy for plotting word-based charts describing an uncategorized corpus.</p> <p dir="auto">Productivity is defined in Schumann (2016) (Jason: check this) as the entropy of ngrams which contain a term. For the entropy computation, the probability of an n-gram wrt the term whose productivity is being calculated is the frequency of the n-gram divided by the term's frequency.</p> <p dir="auto">Since productivity highly correlates with frequency, the recommended metric to plot is the dense rank difference between frequency and productivity.</p> <p dir="auto">The snippet below plots words in the convention corpus based on their log frequency and their productivity.</p> <p dir="auto">The function <code>st.whole_corpus_productivity_scores</code> returns a DataFrame giving each word's productivity. For example, in the convention corpus,</p> <p dir="auto">Productivity scores should be calculated on a <code>Corpus</code>-like object which contains a complete set of unigrams and at least bigrams. This corpus should not be compacted before the productivity score calculation.</p> <p dir="auto">The terms with lower productivity have more limited usage (e.g., "thank" for "thank you", "united" for "united steates") while the terms with higher productivity occurr in a wider varity of contexts ("getting", "actually", "political", etc.).</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import spacy import scattertext as st corpus_no_cat = st.CorpusWithoutCategoriesFromParsedDocuments( st.SampleCorpora.ConventionData2012.get_data().assign( Parse=lambda df: [x for x in spacy.load('en_core_web_sm').pipe(df.text)]), parsed_col='Parse' ).build() compact_corpus_no_cat = corpus_no_cat.get_stoplisted_unigram_corpus().remove_infrequent_words(9) plot_df = st.whole_corpus_productivity_scores(corpus_no_cat).assign( RankDelta=lambda df: st.RankDifference().get_scores( a=df.Productivity, b=df.Frequency ) ).reindex( compact_corpus_no_cat.get_terms() ).dropna().assign( X=lambda df: df.Frequency, Xpos=lambda df: st.Scalers.log_scale(df.Frequency), Y=lambda df: df.RankDelta, Ypos=lambda df: st.Scalers.scale(df.RankDelta), ) html = st.dataframe_scattertext( compact_corpus_no_cat.whitelist_terms(plot_df.index), plot_df=plot_df, metadata=lambda df: df.get_df()['speaker'], ignore_categories=True, x_label='Rank Frequency', y_label=&quot;Productivity&quot;, left_list_column='Ypos', color_score_column='Ypos', y_axis_labels=['Least Productive', 'Average Productivity', 'Most Productive'], header_names={'upper': 'Most Productive', 'lower': 'Least Productive', 'right': 'Characteristic'}, horizontal_line_y_position=0 ) "><pre><span class="pl-k">import</span> <span class="pl-s1">spacy</span> <span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">corpus_no_cat</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusWithoutCategoriesFromParsedDocuments</span>( <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>().<span class="pl-c1">assign</span>( <span class="pl-v">Parse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: [<span class="pl-s1">x</span> <span class="pl-k">for</span> <span class="pl-s1">x</span> <span class="pl-c1">in</span> <span class="pl-s1">spacy</span>.<span class="pl-c1">load</span>(<span class="pl-s">'en_core_web_sm'</span>).<span class="pl-c1">pipe</span>(<span class="pl-s1">df</span>.<span class="pl-c1">text</span>)]), <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'Parse'</span> ).<span class="pl-c1">build</span>() <span class="pl-s1">compact_corpus_no_cat</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus_no_cat</span>.<span class="pl-c1">get_stoplisted_unigram_corpus</span>().<span class="pl-c1">remove_infrequent_words</span>(<span class="pl-c1">9</span>) <span class="pl-s1">plot_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">whole_corpus_productivity_scores</span>(<span class="pl-s1">corpus_no_cat</span>).<span class="pl-c1">assign</span>( <span class="pl-v">RankDelta</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">RankDifference</span>().<span class="pl-c1">get_scores</span>( <span class="pl-s1">a</span><span class="pl-c1">=</span><span class="pl-s1">df</span>.<span class="pl-c1">Productivity</span>, <span class="pl-s1">b</span><span class="pl-c1">=</span><span class="pl-s1">df</span>.<span class="pl-c1">Frequency</span> ) ).<span class="pl-c1">reindex</span>( <span class="pl-s1">compact_corpus_no_cat</span>.<span class="pl-c1">get_terms</span>() ).<span class="pl-c1">dropna</span>().<span class="pl-c1">assign</span>( <span class="pl-c1">X</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">Frequency</span>, <span class="pl-v">Xpos</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">log_scale</span>(<span class="pl-s1">df</span>.<span class="pl-c1">Frequency</span>), <span class="pl-c1">Y</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">RankDelta</span>, <span class="pl-v">Ypos</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">scale</span>(<span class="pl-s1">df</span>.<span class="pl-c1">RankDelta</span>), ) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">dataframe_scattertext</span>( <span class="pl-s1">compact_corpus_no_cat</span>.<span class="pl-c1">whitelist_terms</span>(<span class="pl-s1">plot_df</span>.<span class="pl-c1">index</span>), <span class="pl-s1">plot_df</span><span class="pl-c1">=</span><span class="pl-s1">plot_df</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>], <span class="pl-s1">ignore_categories</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Rank Frequency'</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">"Productivity"</span>, <span class="pl-s1">left_list_column</span><span class="pl-c1">=</span><span class="pl-s">'Ypos'</span>, <span class="pl-s1">color_score_column</span><span class="pl-c1">=</span><span class="pl-s">'Ypos'</span>, <span class="pl-s1">y_axis_labels</span><span class="pl-c1">=</span>[<span class="pl-s">'Least Productive'</span>, <span class="pl-s">'Average Productivity'</span>, <span class="pl-s">'Most Productive'</span>], <span class="pl-s1">header_names</span><span class="pl-c1">=</span>{<span class="pl-s">'upper'</span>: <span class="pl-s">'Most Productive'</span>, <span class="pl-s">'lower'</span>: <span class="pl-s">'Least Productive'</span>, <span class="pl-s">'right'</span>: <span class="pl-s">'Characteristic'</span>}, <span class="pl-s1">horizontal_line_y_position</span><span class="pl-c1">=</span><span class="pl-c1">0</span> )</pre></div> <p dir="auto"><a href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/convention_single_category_productivity.png" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/convention_single_category_productivity.html" alt="Productivity" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Understanding Scaled F-Score</h3><a id="user-content-understanding-scaled-f-score" class="anchor" aria-label="Permalink: Understanding Scaled F-Score" href="#understanding-scaled-f-score"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Let's now turn our attention to a novel term scoring metric, Scaled F-Score. We'll examine this on a unigram version of the Rotten Tomatoes corpus (Pang et al. 2002). It contains excerpts of positive and negative movie reviews.</p> <p dir="auto">Please see <a href="http://nbviewer.jupyter.org/github/JasonKessler/GlobalAI2018/blob/master/notebook/Scaled-F-Score-Explanation.ipynb" rel="nofollow">Scaled F Score Explanation</a> for a notebook version of this analysis.</p> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs1.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs1.png" alt="Scaled F-Score Explanation 1" style="max-width: 100%;"></a></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="from scipy.stats import hmean term_freq_df = corpus.get_unigram_corpus().get_term_freq_df()[['Positive freq', 'Negative freq']] term_freq_df = term_freq_df[term_freq_df.sum(axis=1) &gt; 0] term_freq_df['pos_precision'] = (term_freq_df['Positive freq'] * 1. / (term_freq_df['Positive freq'] + term_freq_df['Negative freq'])) term_freq_df['pos_freq_pct'] = (term_freq_df['Positive freq'] * 1. / term_freq_df['Positive freq'].sum()) term_freq_df['pos_hmean'] = (term_freq_df .apply(lambda x: (hmean([x['pos_precision'], x['pos_freq_pct']]) if x['pos_precision'] &gt; 0 and x['pos_freq_pct'] &gt; 0 else 0), axis=1)) term_freq_df.sort_values(by='pos_hmean', ascending=False).iloc[:10]"><pre><span class="pl-k">from</span> <span class="pl-s1">scipy</span>.<span class="pl-s1">stats</span> <span class="pl-k">import</span> <span class="pl-s1">hmean</span> <span class="pl-s1">term_freq_df</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_unigram_corpus</span>().<span class="pl-c1">get_term_freq_df</span>()[[<span class="pl-s">'Positive freq'</span>, <span class="pl-s">'Negative freq'</span>]] <span class="pl-s1">term_freq_df</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">sum</span>(<span class="pl-s1">axis</span><span class="pl-c1">=</span><span class="pl-c1">1</span>) <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span>] <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_precision'</span>] <span class="pl-c1">=</span> (<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Positive freq'</span>] <span class="pl-c1">*</span> <span class="pl-c1">1.</span> <span class="pl-c1">/</span> (<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Positive freq'</span>] <span class="pl-c1">+</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Negative freq'</span>])) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_freq_pct'</span>] <span class="pl-c1">=</span> (<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Positive freq'</span>] <span class="pl-c1">*</span> <span class="pl-c1">1.</span> <span class="pl-c1">/</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Positive freq'</span>].<span class="pl-c1">sum</span>()) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_hmean'</span>] <span class="pl-c1">=</span> (<span class="pl-s1">term_freq_df</span> .<span class="pl-c1">apply</span>(<span class="pl-k">lambda</span> <span class="pl-s1">x</span>: (<span class="pl-en">hmean</span>([<span class="pl-s1">x</span>[<span class="pl-s">'pos_precision'</span>], <span class="pl-s1">x</span>[<span class="pl-s">'pos_freq_pct'</span>]]) <span class="pl-k">if</span> <span class="pl-s1">x</span>[<span class="pl-s">'pos_precision'</span>] <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span> <span class="pl-c1">and</span> <span class="pl-s1">x</span>[<span class="pl-s">'pos_freq_pct'</span>] <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span> <span class="pl-k">else</span> <span class="pl-c1">0</span>), <span class="pl-s1">axis</span><span class="pl-c1">=</span><span class="pl-c1">1</span>)) <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'pos_hmean'</span>, <span class="pl-s1">ascending</span><span class="pl-c1">=</span><span class="pl-c1">False</span>).<span class="pl-c1">iloc</span>[:<span class="pl-c1">10</span>]</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs2.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs2.png" alt="SFS2" style="max-width: 100%;"></a></p> <p dir="auto">If we plot term frequency on the x-axis and the percentage of a term's occurrences which are in positive documents (i.e., its precision) on the y-axis, we can see that low-frequency terms have a much higher variation in the precision. Given these terms have low frequencies, the harmonic means are low. Thus, the only terms which have a high harmonic mean are extremely frequent words which tend to all have near average precisions.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="freq = term_freq_df.pos_freq_pct.values prec = term_freq_df.pos_precision.values html = st.produce_scattertext_explorer( corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)), category='Positive', not_category_name='Negative', not_categories=['Negative'], x_label='Portion of words used in positive reviews', original_x=freq, x_coords=(freq - freq.min()) / freq.max(), x_axis_values=[int(freq.min() * 1000) / 1000., int(freq.max() * 1000) / 1000.], y_label='Portion of documents containing word that are positive', original_y=prec, y_coords=(prec - prec.min()) / prec.max(), y_axis_values=[int(prec.min() * 1000) / 1000., int((prec.max() / 2.) * 1000) / 1000., int(prec.max() * 1000) / 1000.], scores=term_freq_df.pos_hmean.values, sort_by_dist=False, show_characteristic=False ) file_name = 'not_normed_freq_prec.html' open(file_name, 'wb').write(html.encode('utf-8')) IFrame(src=file_name, width=1300, height=700)"><pre><span class="pl-s1">freq</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_freq_pct</span>.<span class="pl-c1">values</span> <span class="pl-s1">prec</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_precision</span>.<span class="pl-c1">values</span> <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>.<span class="pl-c1">remove_terms</span>(<span class="pl-en">set</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_terms</span>()) <span class="pl-c1">-</span> <span class="pl-en">set</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">index</span>)), <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'Positive'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Negative'</span>, <span class="pl-s1">not_categories</span><span class="pl-c1">=</span>[<span class="pl-s">'Negative'</span>], <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Portion of words used in positive reviews'</span>, <span class="pl-s1">original_x</span><span class="pl-c1">=</span><span class="pl-s1">freq</span>, <span class="pl-s1">x_coords</span><span class="pl-c1">=</span>(<span class="pl-s1">freq</span> <span class="pl-c1">-</span> <span class="pl-s1">freq</span>.<span class="pl-c1">min</span>()) <span class="pl-c1">/</span> <span class="pl-s1">freq</span>.<span class="pl-c1">max</span>(), <span class="pl-s1">x_axis_values</span><span class="pl-c1">=</span>[<span class="pl-en">int</span>(<span class="pl-s1">freq</span>.<span class="pl-c1">min</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>, <span class="pl-en">int</span>(<span class="pl-s1">freq</span>.<span class="pl-c1">max</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>], <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">'Portion of documents containing word that are positive'</span>, <span class="pl-s1">original_y</span><span class="pl-c1">=</span><span class="pl-s1">prec</span>, <span class="pl-s1">y_coords</span><span class="pl-c1">=</span>(<span class="pl-s1">prec</span> <span class="pl-c1">-</span> <span class="pl-s1">prec</span>.<span class="pl-c1">min</span>()) <span class="pl-c1">/</span> <span class="pl-s1">prec</span>.<span class="pl-c1">max</span>(), <span class="pl-s1">y_axis_values</span><span class="pl-c1">=</span>[<span class="pl-en">int</span>(<span class="pl-s1">prec</span>.<span class="pl-c1">min</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>, <span class="pl-en">int</span>((<span class="pl-s1">prec</span>.<span class="pl-c1">max</span>() <span class="pl-c1">/</span> <span class="pl-c1">2.</span>) <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>, <span class="pl-en">int</span>(<span class="pl-s1">prec</span>.<span class="pl-c1">max</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>], <span class="pl-s1">scores</span><span class="pl-c1">=</span><span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_hmean</span>.<span class="pl-c1">values</span>, <span class="pl-s1">sort_by_dist</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">show_characteristic</span><span class="pl-c1">=</span><span class="pl-c1">False</span> ) <span class="pl-s1">file_name</span> <span class="pl-c1">=</span> <span class="pl-s">'not_normed_freq_prec.html'</span> <span class="pl-en">open</span>(<span class="pl-s1">file_name</span>, <span class="pl-s">'wb'</span>).<span class="pl-c1">write</span>(<span class="pl-s1">html</span>.<span class="pl-c1">encode</span>(<span class="pl-s">'utf-8'</span>)) <span class="pl-en">IFrame</span>(<span class="pl-s1">src</span><span class="pl-c1">=</span><span class="pl-s1">file_name</span>, <span class="pl-s1">width</span><span class="pl-c1">=</span><span class="pl-c1">1300</span>, <span class="pl-s1">height</span><span class="pl-c1">=</span><span class="pl-c1">700</span>)</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs3.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs3.png" alt="SFS3" style="max-width: 100%;"></a></p> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs4.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs4.png" alt="SFS4" style="max-width: 100%;"></a></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="from scipy.stats import norm def normcdf(x): return norm.cdf(x, x.mean(), x.std()) term_freq_df['pos_precision_normcdf'] = normcdf(term_freq_df.pos_precision) term_freq_df['pos_freq_pct_normcdf'] = normcdf(term_freq_df.pos_freq_pct.values) term_freq_df['pos_scaled_f_score'] = hmean( [term_freq_df['pos_precision_normcdf'], term_freq_df['pos_freq_pct_normcdf']]) term_freq_df.sort_values(by='pos_scaled_f_score', ascending=False).iloc[:10]"><pre><span class="pl-k">from</span> <span class="pl-s1">scipy</span>.<span class="pl-s1">stats</span> <span class="pl-k">import</span> <span class="pl-s1">norm</span> <span class="pl-k">def</span> <span class="pl-en">normcdf</span>(<span class="pl-s1">x</span>): <span class="pl-k">return</span> <span class="pl-s1">norm</span>.<span class="pl-c1">cdf</span>(<span class="pl-s1">x</span>, <span class="pl-s1">x</span>.<span class="pl-c1">mean</span>(), <span class="pl-s1">x</span>.<span class="pl-c1">std</span>()) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_precision_normcdf'</span>] <span class="pl-c1">=</span> <span class="pl-en">normcdf</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_precision</span>) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_freq_pct_normcdf'</span>] <span class="pl-c1">=</span> <span class="pl-en">normcdf</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_freq_pct</span>.<span class="pl-c1">values</span>) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_scaled_f_score'</span>] <span class="pl-c1">=</span> <span class="pl-en">hmean</span>( [<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_precision_normcdf'</span>], <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_freq_pct_normcdf'</span>]]) <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'pos_scaled_f_score'</span>, <span class="pl-s1">ascending</span><span class="pl-c1">=</span><span class="pl-c1">False</span>).<span class="pl-c1">iloc</span>[:<span class="pl-c1">10</span>]</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs5.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs5.png" alt="SFS5" style="max-width: 100%;"></a></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="freq = term_freq_df.pos_freq_pct_normcdf.values prec = term_freq_df.pos_precision_normcdf.values html = st.produce_scattertext_explorer( corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)), category='Positive', not_category_name='Negative', not_categories=['Negative'], x_label='Portion of words used in positive reviews (norm-cdf)', original_x=freq, x_coords=(freq - freq.min()) / freq.max(), x_axis_values=[int(freq.min() * 1000) / 1000., int(freq.max() * 1000) / 1000.], y_label='documents containing word that are positive (norm-cdf)', original_y=prec, y_coords=(prec - prec.min()) / prec.max(), y_axis_values=[int(prec.min() * 1000) / 1000., int((prec.max() / 2.) * 1000) / 1000., int(prec.max() * 1000) / 1000.], scores=term_freq_df.pos_scaled_f_score.values, sort_by_dist=False, show_characteristic=False )"><pre><span class="pl-s1">freq</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_freq_pct_normcdf</span>.<span class="pl-c1">values</span> <span class="pl-s1">prec</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_precision_normcdf</span>.<span class="pl-c1">values</span> <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>.<span class="pl-c1">remove_terms</span>(<span class="pl-en">set</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_terms</span>()) <span class="pl-c1">-</span> <span class="pl-en">set</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">index</span>)), <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'Positive'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Negative'</span>, <span class="pl-s1">not_categories</span><span class="pl-c1">=</span>[<span class="pl-s">'Negative'</span>], <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Portion of words used in positive reviews (norm-cdf)'</span>, <span class="pl-s1">original_x</span><span class="pl-c1">=</span><span class="pl-s1">freq</span>, <span class="pl-s1">x_coords</span><span class="pl-c1">=</span>(<span class="pl-s1">freq</span> <span class="pl-c1">-</span> <span class="pl-s1">freq</span>.<span class="pl-c1">min</span>()) <span class="pl-c1">/</span> <span class="pl-s1">freq</span>.<span class="pl-c1">max</span>(), <span class="pl-s1">x_axis_values</span><span class="pl-c1">=</span>[<span class="pl-en">int</span>(<span class="pl-s1">freq</span>.<span class="pl-c1">min</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>, <span class="pl-en">int</span>(<span class="pl-s1">freq</span>.<span class="pl-c1">max</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>], <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">'documents containing word that are positive (norm-cdf)'</span>, <span class="pl-s1">original_y</span><span class="pl-c1">=</span><span class="pl-s1">prec</span>, <span class="pl-s1">y_coords</span><span class="pl-c1">=</span>(<span class="pl-s1">prec</span> <span class="pl-c1">-</span> <span class="pl-s1">prec</span>.<span class="pl-c1">min</span>()) <span class="pl-c1">/</span> <span class="pl-s1">prec</span>.<span class="pl-c1">max</span>(), <span class="pl-s1">y_axis_values</span><span class="pl-c1">=</span>[<span class="pl-en">int</span>(<span class="pl-s1">prec</span>.<span class="pl-c1">min</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>, <span class="pl-en">int</span>((<span class="pl-s1">prec</span>.<span class="pl-c1">max</span>() <span class="pl-c1">/</span> <span class="pl-c1">2.</span>) <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>, <span class="pl-en">int</span>(<span class="pl-s1">prec</span>.<span class="pl-c1">max</span>() <span class="pl-c1">*</span> <span class="pl-c1">1000</span>) <span class="pl-c1">/</span> <span class="pl-c1">1000.</span>], <span class="pl-s1">scores</span><span class="pl-c1">=</span><span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_scaled_f_score</span>.<span class="pl-c1">values</span>, <span class="pl-s1">sort_by_dist</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">show_characteristic</span><span class="pl-c1">=</span><span class="pl-c1">False</span> )</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs6.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs6.png" alt="SFS6" style="max-width: 100%;"></a></p> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs7.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs7.png" alt="SFS7" style="max-width: 100%;"></a></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="term_freq_df['neg_precision_normcdf'] = normcdf((term_freq_df['Negative freq'] * 1. / (term_freq_df['Negative freq'] + term_freq_df['Positive freq']))) term_freq_df['neg_freq_pct_normcdf'] = normcdf((term_freq_df['Negative freq'] * 1. / term_freq_df['Negative freq'].sum())) term_freq_df['neg_scaled_f_score'] = hmean( [term_freq_df['neg_precision_normcdf'], term_freq_df['neg_freq_pct_normcdf']]) term_freq_df['scaled_f_score'] = 0 term_freq_df.loc[term_freq_df['pos_scaled_f_score'] &gt; term_freq_df['neg_scaled_f_score'], 'scaled_f_score'] = term_freq_df['pos_scaled_f_score'] term_freq_df.loc[term_freq_df['pos_scaled_f_score'] &lt; term_freq_df['neg_scaled_f_score'], 'scaled_f_score'] = 1 - term_freq_df['neg_scaled_f_score'] term_freq_df['scaled_f_score'] = 2 * (term_freq_df['scaled_f_score'] - 0.5) term_freq_df.sort_values(by='scaled_f_score', ascending=True).iloc[:10]"><pre><span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_precision_normcdf'</span>] <span class="pl-c1">=</span> <span class="pl-en">normcdf</span>((<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Negative freq'</span>] <span class="pl-c1">*</span> <span class="pl-c1">1.</span> <span class="pl-c1">/</span> (<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Negative freq'</span>] <span class="pl-c1">+</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Positive freq'</span>]))) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_freq_pct_normcdf'</span>] <span class="pl-c1">=</span> <span class="pl-en">normcdf</span>((<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Negative freq'</span>] <span class="pl-c1">*</span> <span class="pl-c1">1.</span> <span class="pl-c1">/</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'Negative freq'</span>].<span class="pl-c1">sum</span>())) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_scaled_f_score'</span>] <span class="pl-c1">=</span> <span class="pl-en">hmean</span>( [<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_precision_normcdf'</span>], <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_freq_pct_normcdf'</span>]]) <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'scaled_f_score'</span>] <span class="pl-c1">=</span> <span class="pl-c1">0</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">loc</span>[<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_scaled_f_score'</span>] <span class="pl-c1">&gt;</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_scaled_f_score'</span>], <span class="pl-s">'scaled_f_score'</span>] <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_scaled_f_score'</span>] <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">loc</span>[<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'pos_scaled_f_score'</span>] <span class="pl-c1">&lt;</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_scaled_f_score'</span>], <span class="pl-s">'scaled_f_score'</span>] <span class="pl-c1">=</span> <span class="pl-c1">1</span> <span class="pl-c1">-</span> <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'neg_scaled_f_score'</span>] <span class="pl-s1">term_freq_df</span>[<span class="pl-s">'scaled_f_score'</span>] <span class="pl-c1">=</span> <span class="pl-c1">2</span> <span class="pl-c1">*</span> (<span class="pl-s1">term_freq_df</span>[<span class="pl-s">'scaled_f_score'</span>] <span class="pl-c1">-</span> <span class="pl-c1">0.5</span>) <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">sort_values</span>(<span class="pl-s1">by</span><span class="pl-c1">=</span><span class="pl-s">'scaled_f_score'</span>, <span class="pl-s1">ascending</span><span class="pl-c1">=</span><span class="pl-c1">True</span>).<span class="pl-c1">iloc</span>[:<span class="pl-c1">10</span>]</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs8.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs8.png" alt="SFS8" style="max-width: 100%;"></a></p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="is_pos = term_freq_df.pos_scaled_f_score &gt; term_freq_df.neg_scaled_f_score freq = term_freq_df.pos_freq_pct_normcdf * is_pos - term_freq_df.neg_freq_pct_normcdf * ~is_pos prec = term_freq_df.pos_precision_normcdf * is_pos - term_freq_df.neg_precision_normcdf * ~is_pos def scale(ar): return (ar - ar.min()) / (ar.max() - ar.min()) def close_gap(ar): ar[ar &gt; 0] -= ar[ar &gt; 0].min() ar[ar &lt; 0] -= ar[ar &lt; 0].max() return ar html = st.produce_scattertext_explorer( corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)), category='Positive', not_category_name='Negative', not_categories=['Negative'], x_label='Frequency', original_x=freq, x_coords=scale(close_gap(freq)), x_axis_labels=['Frequent in Neg', 'Not Frequent', 'Frequent in Pos'], y_label='Precision', original_y=prec, y_coords=scale(close_gap(prec)), y_axis_labels=['Neg Precise', 'Imprecise', 'Pos Precise'], scores=(term_freq_df.scaled_f_score.values + 1) / 2, sort_by_dist=False, show_characteristic=False )"><pre><span class="pl-s1">is_pos</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_scaled_f_score</span> <span class="pl-c1">&gt;</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">neg_scaled_f_score</span> <span class="pl-s1">freq</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_freq_pct_normcdf</span> <span class="pl-c1">*</span> <span class="pl-s1">is_pos</span> <span class="pl-c1">-</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">neg_freq_pct_normcdf</span> <span class="pl-c1">*</span> <span class="pl-c1">~</span><span class="pl-s1">is_pos</span> <span class="pl-s1">prec</span> <span class="pl-c1">=</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">pos_precision_normcdf</span> <span class="pl-c1">*</span> <span class="pl-s1">is_pos</span> <span class="pl-c1">-</span> <span class="pl-s1">term_freq_df</span>.<span class="pl-c1">neg_precision_normcdf</span> <span class="pl-c1">*</span> <span class="pl-c1">~</span><span class="pl-s1">is_pos</span> <span class="pl-k">def</span> <span class="pl-en">scale</span>(<span class="pl-s1">ar</span>): <span class="pl-k">return</span> (<span class="pl-s1">ar</span> <span class="pl-c1">-</span> <span class="pl-s1">ar</span>.<span class="pl-c1">min</span>()) <span class="pl-c1">/</span> (<span class="pl-s1">ar</span>.<span class="pl-c1">max</span>() <span class="pl-c1">-</span> <span class="pl-s1">ar</span>.<span class="pl-c1">min</span>()) <span class="pl-k">def</span> <span class="pl-en">close_gap</span>(<span class="pl-s1">ar</span>): <span class="pl-s1">ar</span>[<span class="pl-s1">ar</span> <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span>] <span class="pl-c1">-=</span> <span class="pl-s1">ar</span>[<span class="pl-s1">ar</span> <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span>].<span class="pl-c1">min</span>() <span class="pl-s1">ar</span>[<span class="pl-s1">ar</span> <span class="pl-c1">&lt;</span> <span class="pl-c1">0</span>] <span class="pl-c1">-=</span> <span class="pl-s1">ar</span>[<span class="pl-s1">ar</span> <span class="pl-c1">&lt;</span> <span class="pl-c1">0</span>].<span class="pl-c1">max</span>() <span class="pl-k">return</span> <span class="pl-s1">ar</span> <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>.<span class="pl-c1">remove_terms</span>(<span class="pl-en">set</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_terms</span>()) <span class="pl-c1">-</span> <span class="pl-en">set</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">index</span>)), <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'Positive'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Negative'</span>, <span class="pl-s1">not_categories</span><span class="pl-c1">=</span>[<span class="pl-s">'Negative'</span>], <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Frequency'</span>, <span class="pl-s1">original_x</span><span class="pl-c1">=</span><span class="pl-s1">freq</span>, <span class="pl-s1">x_coords</span><span class="pl-c1">=</span><span class="pl-en">scale</span>(<span class="pl-en">close_gap</span>(<span class="pl-s1">freq</span>)), <span class="pl-s1">x_axis_labels</span><span class="pl-c1">=</span>[<span class="pl-s">'Frequent in Neg'</span>, <span class="pl-s">'Not Frequent'</span>, <span class="pl-s">'Frequent in Pos'</span>], <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">'Precision'</span>, <span class="pl-s1">original_y</span><span class="pl-c1">=</span><span class="pl-s1">prec</span>, <span class="pl-s1">y_coords</span><span class="pl-c1">=</span><span class="pl-en">scale</span>(<span class="pl-en">close_gap</span>(<span class="pl-s1">prec</span>)), <span class="pl-s1">y_axis_labels</span><span class="pl-c1">=</span>[<span class="pl-s">'Neg Precise'</span>, <span class="pl-s">'Imprecise'</span>, <span class="pl-s">'Pos Precise'</span>], <span class="pl-s1">scores</span><span class="pl-c1">=</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">scaled_f_score</span>.<span class="pl-c1">values</span> <span class="pl-c1">+</span> <span class="pl-c1">1</span>) <span class="pl-c1">/</span> <span class="pl-c1">2</span>, <span class="pl-s1">sort_by_dist</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">show_characteristic</span><span class="pl-c1">=</span><span class="pl-c1">False</span> )</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs9.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs9.png" alt="SFS9" style="max-width: 100%;"></a></p> <p dir="auto">We can use <code>st.ScaledFScorePresets</code> as a term scorer to display terms' Scaled F-Score on the y-axis and term frequencies on the x-axis.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_frequency_explorer( corpus.remove_terms(set(corpus.get_terms()) - set(term_freq_df.index)), category='Positive', not_category_name='Negative', not_categories=['Negative'], term_scorer=st.ScaledFScorePresets(beta=1, one_to_neg_one=True), metadata=rdf['movie_name'], grey_threshold=0 )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_frequency_explorer</span>( <span class="pl-s1">corpus</span>.<span class="pl-c1">remove_terms</span>(<span class="pl-en">set</span>(<span class="pl-s1">corpus</span>.<span class="pl-c1">get_terms</span>()) <span class="pl-c1">-</span> <span class="pl-en">set</span>(<span class="pl-s1">term_freq_df</span>.<span class="pl-c1">index</span>)), <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'Positive'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Negative'</span>, <span class="pl-s1">not_categories</span><span class="pl-c1">=</span>[<span class="pl-s">'Negative'</span>], <span class="pl-s1">term_scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">ScaledFScorePresets</span>(<span class="pl-s1">beta</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">one_to_neg_one</span><span class="pl-c1">=</span><span class="pl-c1">True</span>), <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">rdf</span>[<span class="pl-s">'movie_name'</span>], <span class="pl-s1">grey_threshold</span><span class="pl-c1">=</span><span class="pl-c1">0</span> )</pre></div> <p dir="auto"><a target="_blank" rel="noopener noreferrer nofollow" href="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs10.png"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/scaledfscoreimgs/sfs10.png" alt="SFS10" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Alternative term scoring methods</h3><a id="user-content-alternative-term-scoring-methods" class="anchor" aria-label="Permalink: Alternative term scoring methods" href="#alternative-term-scoring-methods"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Scaled F-Score is not the only scoring method included in Scattertext. Please click on one of the links below to view a notebook which describes how other class association scores work and can be visualized through Scattertext.</p> <ul dir="auto"> <li><a href="https://colab.research.google.com/drive/1snxAP8X6EIDi42FugJ_h5U-fBGDCqtyS" rel="nofollow">Google Colab Notebook</a> (recommend).</li> <li><a href="https://colab.research.google.com/drive/1snxAP8X6EIDi42FugJ_h5U-fBGDCqtyS" rel="nofollow">Jupyter Notebook via NBViewer</a>.</li> </ul> <p dir="auto">New in 0.0.2.73 is the delta JS-Divergence scorer <code>DeltaJSDivergence</code> scorer (Gallagher et al. 2020), and its corresponding compactor (JSDCompactor.) See <code>demo_deltajsd.py</code> for an example usage.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">The position-select-plot process</h3><a id="user-content-the-position-select-plot-process" class="anchor" aria-label="Permalink: The position-select-plot process" href="#the-position-select-plot-process"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">New in 0.0.2.72</p> <p dir="auto">Scattertext was originally set up to visualize corpora objects, which are connected sets of documents and terms to visualize. The "compaction" process allows users to eliminate terms which may not be associated with a category using a variety of feature selection methods. The issue with this is that the terms eliminated during the selection process are not taken into account when scaling term positions.</p> <p dir="auto">This issue can be mitigated by using the position-select-plot process, where term positions are pre-determined before the selection process is made.</p> <p dir="auto">Let's first use the 2012 conventions corpus, update the category names, and create a unigram corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import scattertext as st import numpy as np df = st.SampleCorpora.ConventionData2012.get_data().assign( parse=lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ).assign(party=lambda df: df['party'].apply({'democrat': 'Democratic', 'republican': 'Republican'}.get)) corpus = st.CorpusFromParsedDocuments( df, category_col='party', parsed_col='parse' ).build().get_unigram_corpus() category_name = 'Democratic' not_category_name = 'Republican'"><pre><span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-k">import</span> <span class="pl-s1">numpy</span> <span class="pl-k">as</span> <span class="pl-s1">np</span> <span class="pl-s1">df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>().<span class="pl-c1">assign</span>( <span class="pl-s1">parse</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) ).<span class="pl-c1">assign</span>(<span class="pl-s1">party</span><span class="pl-c1">=</span><span class="pl-k">lambda</span> <span class="pl-s1">df</span>: <span class="pl-s1">df</span>[<span class="pl-s">'party'</span>].<span class="pl-c1">apply</span>({<span class="pl-s">'democrat'</span>: <span class="pl-s">'Democratic'</span>, <span class="pl-s">'republican'</span>: <span class="pl-s">'Republican'</span>}.<span class="pl-c1">get</span>)) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>( <span class="pl-s1">df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span> ).<span class="pl-c1">build</span>().<span class="pl-c1">get_unigram_corpus</span>() <span class="pl-s1">category_name</span> <span class="pl-c1">=</span> <span class="pl-s">'Democratic'</span> <span class="pl-s1">not_category_name</span> <span class="pl-c1">=</span> <span class="pl-s">'Republican'</span></pre></div> <p dir="auto">Next, let's create a dataframe consisting of the original counts and their log-scale positions.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="def get_log_scale_df(corpus, y_category, x_category): term_coord_df = corpus.get_term_freq_df('') # Log scale term counts (with a smoothing constant) as the initial coordinates coord_columns = [] for category in [y_category, x_category]: col_name = category + '_coord' term_coord_df[col_name] = np.log(term_coord_df[category] + 1e-6) / np.log(2) coord_columns.append(col_name) # Scale these coordinates to between 0 and 1 min_offset = term_coord_df[coord_columns].min(axis=0).min() for coord_column in coord_columns: term_coord_df[coord_column] -= min_offset max_offset = term_coord_df[coord_columns].max(axis=0).max() for coord_column in coord_columns: term_coord_df[coord_column] /= max_offset return term_coord_df # Get term coordinates from original corpus term_coordinates = get_log_scale_df(corpus, category_name, not_category_name) print(term_coordinates)"><pre><span class="pl-k">def</span> <span class="pl-en">get_log_scale_df</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">y_category</span>, <span class="pl-s1">x_category</span>): <span class="pl-s1">term_coord_df</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">get_term_freq_df</span>(<span class="pl-s">''</span>) <span class="pl-c"># Log scale term counts (with a smoothing constant) as the initial coordinates</span> <span class="pl-s1">coord_columns</span> <span class="pl-c1">=</span> [] <span class="pl-k">for</span> <span class="pl-s1">category</span> <span class="pl-c1">in</span> [<span class="pl-s1">y_category</span>, <span class="pl-s1">x_category</span>]: <span class="pl-s1">col_name</span> <span class="pl-c1">=</span> <span class="pl-s1">category</span> <span class="pl-c1">+</span> <span class="pl-s">'_coord'</span> <span class="pl-s1">term_coord_df</span>[<span class="pl-s1">col_name</span>] <span class="pl-c1">=</span> <span class="pl-s1">np</span>.<span class="pl-c1">log</span>(<span class="pl-s1">term_coord_df</span>[<span class="pl-s1">category</span>] <span class="pl-c1">+</span> <span class="pl-c1">1e-6</span>) <span class="pl-c1">/</span> <span class="pl-s1">np</span>.<span class="pl-c1">log</span>(<span class="pl-c1">2</span>) <span class="pl-s1">coord_columns</span>.<span class="pl-c1">append</span>(<span class="pl-s1">col_name</span>) <span class="pl-c"># Scale these coordinates to between 0 and 1</span> <span class="pl-s1">min_offset</span> <span class="pl-c1">=</span> <span class="pl-s1">term_coord_df</span>[<span class="pl-s1">coord_columns</span>].<span class="pl-c1">min</span>(<span class="pl-s1">axis</span><span class="pl-c1">=</span><span class="pl-c1">0</span>).<span class="pl-c1">min</span>() <span class="pl-k">for</span> <span class="pl-s1">coord_column</span> <span class="pl-c1">in</span> <span class="pl-s1">coord_columns</span>: <span class="pl-s1">term_coord_df</span>[<span class="pl-s1">coord_column</span>] <span class="pl-c1">-=</span> <span class="pl-s1">min_offset</span> <span class="pl-s1">max_offset</span> <span class="pl-c1">=</span> <span class="pl-s1">term_coord_df</span>[<span class="pl-s1">coord_columns</span>].<span class="pl-c1">max</span>(<span class="pl-s1">axis</span><span class="pl-c1">=</span><span class="pl-c1">0</span>).<span class="pl-c1">max</span>() <span class="pl-k">for</span> <span class="pl-s1">coord_column</span> <span class="pl-c1">in</span> <span class="pl-s1">coord_columns</span>: <span class="pl-s1">term_coord_df</span>[<span class="pl-s1">coord_column</span>] <span class="pl-c1">/=</span> <span class="pl-s1">max_offset</span> <span class="pl-k">return</span> <span class="pl-s1">term_coord_df</span> <span class="pl-c"># Get term coordinates from original corpus</span> <span class="pl-s1">term_coordinates</span> <span class="pl-c1">=</span> <span class="pl-en">get_log_scale_df</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">category_name</span>, <span class="pl-s1">not_category_name</span>) <span class="pl-en">print</span>(<span class="pl-s1">term_coordinates</span>)</pre></div> <p dir="auto">Here is a preview of the <code>term_coordinates</code> dataframe. The <code>Democrat</code> and <code>Republican</code> columns contain the term counts, while the <code>_coord</code> columns contain their logged coordinates. Visualizing 7,973 terms is difficult (but possible) for people running Scattertext on most computers.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content=" Democratic Republican Democratic_coord Republican_coord term thank 158 205 0.860166 0.872032 you 836 794 0.936078 0.933729 so 337 212 0.894681 0.873562 much 84 76 0.831380 0.826820 very 62 75 0.817543 0.826216 ... ... ... ... ... precinct 0 2 0.000000 0.661076 godspeed 0 1 0.000000 0.629493 beauty 0 1 0.000000 0.629493 bumper 0 1 0.000000 0.629493 sticker 0 1 0.000000 0.629493 [7973 rows x 4 columns]"><pre class="notranslate"><code> Democratic Republican Democratic_coord Republican_coord term thank 158 205 0.860166 0.872032 you 836 794 0.936078 0.933729 so 337 212 0.894681 0.873562 much 84 76 0.831380 0.826820 very 62 75 0.817543 0.826216 ... ... ... ... ... precinct 0 2 0.000000 0.661076 godspeed 0 1 0.000000 0.629493 beauty 0 1 0.000000 0.629493 bumper 0 1 0.000000 0.629493 sticker 0 1 0.000000 0.629493 [7973 rows x 4 columns] </code></pre></div> <p dir="auto">We can visualize this full data set by running the following code block. We'll create a custom Javascript function to populate the tooltip with the original term counts, and create a Scattertext Explorer where the x and y coordinates and original values are specified from the data frame. Additionally, we can use <code>show_diagonal=True</code> to draw a dashed diagonal line across the plot area.</p> <p dir="auto">You can click the chart below to see the interactive version. Note that it will take a while to load.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="# The tooltip JS function. Note that d is is the term data object, and ox and oy are the original x- and y- # axis counts. get_tooltip_content = ('(function(d) {return d.term + &quot;&lt;br/&gt;' + not_category_name + ' Count: &quot; ' + '+ d.ox +&quot;&lt;br/&gt;' + category_name + ' Count: &quot; + d.oy})') html_orig = st.produce_scattertext_explorer( corpus, category=category_name, not_category_name=not_category_name, minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, metadata=corpus.get_df()['speaker'], show_diagonal=True, original_y=term_coordinates[category_name], original_x=term_coordinates[not_category_name], x_coords=term_coordinates[category_name + '_coord'], y_coords=term_coordinates[not_category_name + '_coord'], max_overlapping=3, use_global_scale=True, get_tooltip_content=get_tooltip_content, )"><pre class="notranslate"><code># The tooltip JS function. Note that d is is the term data object, and ox and oy are the original x- and y- # axis counts. get_tooltip_content = ('(function(d) {return d.term + "&lt;br/&gt;' + not_category_name + ' Count: " ' + '+ d.ox +"&lt;br/&gt;' + category_name + ' Count: " + d.oy})') html_orig = st.produce_scattertext_explorer( corpus, category=category_name, not_category_name=not_category_name, minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, metadata=corpus.get_df()['speaker'], show_diagonal=True, original_y=term_coordinates[category_name], original_x=term_coordinates[not_category_name], x_coords=term_coordinates[category_name + '_coord'], y_coords=term_coordinates[not_category_name + '_coord'], max_overlapping=3, use_global_scale=True, get_tooltip_content=get_tooltip_content, ) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_global_scale_log_orig.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_global_scale_log_orig.png" alt="demo_global_scale_log_orig.png" style="max-width: 100%;"></a></p> <p dir="auto">Next, we can visualize the compacted version of the corpus. The compaction, using <code>ClassPercentageCompactor</code>, selects terms which frequently in each category. The <code>term_count</code> parameter, set to 2, is used to determine the percentage threshold for terms to keep in a particular category. This is done using by calculating the percentile of terms (types) in each category which appear more than two times. We find the smallest percentile, and only include terms which occur above that percentile in a given category.</p> <p dir="auto">Note that this compaction leaves only 2,828 terms. This number is much easier for Scattertext to display in a browser.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="# Select terms which appear a minimum threshold in both corpora compact_corpus = corpus.compact(st.ClassPercentageCompactor(term_count=2)) # Only take term coordinates of terms remaining in corpus term_coordinates = term_coordinates.loc[compact_corpus.get_terms()] html_compact = st.produce_scattertext_explorer( compact_corpus, category=category_name, not_category_name=not_category_name, minimum_term_frequency=0, pmi_threshold_coefficient=0, width_in_pixels=1000, metadata=corpus.get_df()['speaker'], show_diagonal=True, original_y=term_coordinates[category_name], original_x=term_coordinates[not_category_name], x_coords=term_coordinates[category_name + '_coord'], y_coords=term_coordinates[not_category_name + '_coord'], max_overlapping=3, use_global_scale=True, get_tooltip_content=get_tooltip_content, )"><pre><span class="pl-c"># Select terms which appear a minimum threshold in both corpora</span> <span class="pl-s1">compact_corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">corpus</span>.<span class="pl-c1">compact</span>(<span class="pl-s1">st</span>.<span class="pl-c1">ClassPercentageCompactor</span>(<span class="pl-s1">term_count</span><span class="pl-c1">=</span><span class="pl-c1">2</span>)) <span class="pl-c"># Only take term coordinates of terms remaining in corpus</span> <span class="pl-s1">term_coordinates</span> <span class="pl-c1">=</span> <span class="pl-s1">term_coordinates</span>.<span class="pl-c1">loc</span>[<span class="pl-s1">compact_corpus</span>.<span class="pl-c1">get_terms</span>()] <span class="pl-s1">html_compact</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">compact_corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s1">category_name</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s1">not_category_name</span>, <span class="pl-s1">minimum_term_frequency</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">pmi_threshold_coefficient</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">corpus</span>.<span class="pl-c1">get_df</span>()[<span class="pl-s">'speaker'</span>], <span class="pl-s1">show_diagonal</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">original_y</span><span class="pl-c1">=</span><span class="pl-s1">term_coordinates</span>[<span class="pl-s1">category_name</span>], <span class="pl-s1">original_x</span><span class="pl-c1">=</span><span class="pl-s1">term_coordinates</span>[<span class="pl-s1">not_category_name</span>], <span class="pl-s1">x_coords</span><span class="pl-c1">=</span><span class="pl-s1">term_coordinates</span>[<span class="pl-s1">category_name</span> <span class="pl-c1">+</span> <span class="pl-s">'_coord'</span>], <span class="pl-s1">y_coords</span><span class="pl-c1">=</span><span class="pl-s1">term_coordinates</span>[<span class="pl-s1">not_category_name</span> <span class="pl-c1">+</span> <span class="pl-s">'_coord'</span>], <span class="pl-s1">max_overlapping</span><span class="pl-c1">=</span><span class="pl-c1">3</span>, <span class="pl-s1">use_global_scale</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">get_tooltip_content</span><span class="pl-c1">=</span><span class="pl-s1">get_tooltip_content</span>, )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_global_scale_log.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_global_scale_log.png" alt="demo_global_scale_log.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Advanced uses</h2><a id="user-content-advanced-uses" class="anchor" aria-label="Permalink: Advanced uses" href="#advanced-uses"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing differences based on only term frequencies</h3><a id="user-content-visualizing-differences-based-on-only-term-frequencies" class="anchor" aria-label="Permalink: Visualizing differences based on only term frequencies" href="#visualizing-differences-based-on-only-term-frequencies"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Occasionally, only term frequency statistics are available. This may happen in the case of very large, lost, or proprietary data sets. <code>TermCategoryFrequencies</code> is a corpus representation,that can accept this sort of data, along with any categorized documents that happen to be available.</p> <p dir="auto">Let use the <a href="https://corpus.byu.edu/coca/" rel="nofollow">Corpus of Contemporary American English</a> as an example.<br> We'll construct a visualization to analyze the difference between spoken American English and English that occurs in fiction.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="df = (pd.read_excel('https://www.wordfrequency.info/files/genres_sample.xls') .dropna() .set_index('lemma')[['SPOKEN', 'FICTION']] .iloc[:1000]) df.head() ''' SPOKEN FICTION lemma the 3859682.0 4092394.0 I 1346545.0 1382716.0 they 609735.0 352405.0 she 212920.0 798208.0 would 233766.0 229865.0 '''"><pre><span class="pl-s1">df</span> <span class="pl-c1">=</span> (<span class="pl-s1">pd</span>.<span class="pl-c1">read_excel</span>(<span class="pl-s">'https://www.wordfrequency.info/files/genres_sample.xls'</span>) .<span class="pl-c1">dropna</span>() .<span class="pl-c1">set_index</span>(<span class="pl-s">'lemma'</span>)[[<span class="pl-s">'SPOKEN'</span>, <span class="pl-s">'FICTION'</span>]] .<span class="pl-c1">iloc</span>[:<span class="pl-c1">1000</span>]) <span class="pl-s1">df</span>.<span class="pl-c1">head</span>() <span class="pl-s">'''</span> <span class="pl-s"> SPOKEN FICTION</span> <span class="pl-s">lemma</span> <span class="pl-s">the 3859682.0 4092394.0</span> <span class="pl-s">I 1346545.0 1382716.0</span> <span class="pl-s">they 609735.0 352405.0</span> <span class="pl-s">she 212920.0 798208.0</span> <span class="pl-s">would 233766.0 229865.0</span> <span class="pl-s">'''</span></pre></div> <p dir="auto">Transforming this into a visualization is extremely easy. Just pass a dataframe indexed on terms with columns indicating category-counts into the the <code>TermCategoryFrequencies</code> constructor.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="term_cat_freq = st.TermCategoryFrequencies(df)"><pre><span class="pl-s1">term_cat_freq</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">TermCategoryFrequencies</span>(<span class="pl-s1">df</span>)</pre></div> <p dir="auto">And call <code>produce_scattertext_explorer</code> normally:</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_scattertext_explorer( term_cat_freq, category='SPOKEN', category_name='Spoken', not_category_name='Fiction', )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">term_cat_freq</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'SPOKEN'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Spoken'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Fiction'</span>, )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_category_frequencies.html" rel="nofollow"><img src="https://camo.githubusercontent.com/1d763c1a79f26c2a707f3bd822d241a7a29e7a69f998a2d6fe5e69543b098ac4/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f63617465676f72795f6672657175656e636965732e706e67" alt="demo_category_frequencies.html" data-canonical-src="https://jasonkessler.github.io/demo_category_frequencies.png" style="max-width: 100%;"></a></p> <p dir="auto">If you'd like to incorporate some documents into the visualization, you can add them into to the <code>TermCategoyFrequencies</code> object.</p> <p dir="auto">First, let's extract some example Fiction and Spoken documents from the sample COCA corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import requests, zipfile, io coca_sample_url = 'http://corpus.byu.edu/cocatext/samples/text.zip' zip_file = zipfile.ZipFile(io.BytesIO(requests.get(coca_sample_url).content)) document_df = pd.DataFrame( [{'text': zip_file.open(fn).read().decode('utf-8'), 'category': 'SPOKEN'} for fn in zip_file.filelist if fn.filename.startswith('w_spok')][:2] + [{'text': zip_file.open(fn).read().decode('utf-8'), 'category': 'FICTION'} for fn in zip_file.filelist if fn.filename.startswith('w_fic')][:2])"><pre><span class="pl-k">import</span> <span class="pl-s1">requests</span>, <span class="pl-s1">zipfile</span>, <span class="pl-s1">io</span> <span class="pl-s1">coca_sample_url</span> <span class="pl-c1">=</span> <span class="pl-s">'http://corpus.byu.edu/cocatext/samples/text.zip'</span> <span class="pl-s1">zip_file</span> <span class="pl-c1">=</span> <span class="pl-s1">zipfile</span>.<span class="pl-c1">ZipFile</span>(<span class="pl-s1">io</span>.<span class="pl-c1">BytesIO</span>(<span class="pl-s1">requests</span>.<span class="pl-c1">get</span>(<span class="pl-s1">coca_sample_url</span>).<span class="pl-c1">content</span>)) <span class="pl-s1">document_df</span> <span class="pl-c1">=</span> <span class="pl-s1">pd</span>.<span class="pl-c1">DataFrame</span>( [{<span class="pl-s">'text'</span>: <span class="pl-s1">zip_file</span>.<span class="pl-c1">open</span>(<span class="pl-s1">fn</span>).<span class="pl-c1">read</span>().<span class="pl-c1">decode</span>(<span class="pl-s">'utf-8'</span>), <span class="pl-s">'category'</span>: <span class="pl-s">'SPOKEN'</span>} <span class="pl-k">for</span> <span class="pl-s1">fn</span> <span class="pl-c1">in</span> <span class="pl-s1">zip_file</span>.<span class="pl-c1">filelist</span> <span class="pl-k">if</span> <span class="pl-s1">fn</span>.<span class="pl-c1">filename</span>.<span class="pl-c1">startswith</span>(<span class="pl-s">'w_spok'</span>)][:<span class="pl-c1">2</span>] <span class="pl-c1">+</span> [{<span class="pl-s">'text'</span>: <span class="pl-s1">zip_file</span>.<span class="pl-c1">open</span>(<span class="pl-s1">fn</span>).<span class="pl-c1">read</span>().<span class="pl-c1">decode</span>(<span class="pl-s">'utf-8'</span>), <span class="pl-s">'category'</span>: <span class="pl-s">'FICTION'</span>} <span class="pl-k">for</span> <span class="pl-s1">fn</span> <span class="pl-c1">in</span> <span class="pl-s1">zip_file</span>.<span class="pl-c1">filelist</span> <span class="pl-k">if</span> <span class="pl-s1">fn</span>.<span class="pl-c1">filename</span>.<span class="pl-c1">startswith</span>(<span class="pl-s">'w_fic'</span>)][:<span class="pl-c1">2</span>])</pre></div> <p dir="auto">And we'll pass the <code>documents_df</code> dataframe into <code>TermCategoryFrequencies</code> via the <code>document_category_df</code> parameter. Ensure the dataframe has two columns, 'text' and 'category'. Afterward, we can call <code>produce_scattertext_explorer</code> (or your visualization function of choice) normally.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="doc_term_cat_freq = st.TermCategoryFrequencies(df, document_category_df=document_df) html = st.produce_scattertext_explorer( doc_term_cat_freq, category='SPOKEN', category_name='Spoken', not_category_name='Fiction', )"><pre><span class="pl-s1">doc_term_cat_freq</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">TermCategoryFrequencies</span>(<span class="pl-s1">df</span>, <span class="pl-s1">document_category_df</span><span class="pl-c1">=</span><span class="pl-s1">document_df</span>) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">doc_term_cat_freq</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'SPOKEN'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Spoken'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Fiction'</span>, )</pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing query-based categorical differences</h3><a id="user-content-visualizing-query-based-categorical-differences" class="anchor" aria-label="Permalink: Visualizing query-based categorical differences" href="#visualizing-query-based-categorical-differences"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Word representations have recently become a hot topic in NLP. While lots of work has been done visualizing how terms relate to one another given their scores (e.g., <a href="http://projector.tensorflow.org/" rel="nofollow">http://projector.tensorflow.org/</a>), none to my knowledge has been done visualizing how we can use these to examine how document categories differ.</p> <p dir="auto">In this example given a query term, "jobs", we can see how Republicans and Democrats talk about it differently.</p> <p dir="auto">In this configuration of Scattertext, words are colored by their similarity to a query phrase.<br> This is done using <a href="https://spacy.io/" rel="nofollow">spaCy</a>-provided GloVe word vectors (trained on the Common Crawl corpus). The cosine distance between vectors is used, with mean vectors used for phrases.</p> <p dir="auto">The calculation of the most similar terms associated with each category is a simple heuristic. First, sets of terms closely associated with a category are found. Second, these terms are ranked based on their similarity to the query, and the top rank terms are displayed to the right of the scatterplot.</p> <p dir="auto">A term is considered associated if its p-value is less than 0.05. P-values are determined using Monroe et al. (2008)'s difference in the weighted log-odds-ratios with an uninformative Dirichlet prior. This is the only model-based method discussed in Monroe et al. that does not rely on a large, in-domain background corpus. Since we are scoring bigrams in addition to the unigrams scored by Monroe, the size of the corpus would have to be larger to have high enough bigram counts for proper penalization. This function relies the Dirichlet distribution's parameter alpha, a vector, which is uniformly set to 0.01.</p> <p dir="auto">Here is the code to produce such a visualization.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; from scattertext import word_similarity_explorer &gt;&gt;&gt; html = word_similarity_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... target_term='jobs', ... minimum_term_frequency=5, ... pmi_threshold_coefficient=4, ... width_in_pixels=1000, ... metadata=convention_df['speaker'], ... alpha=0.01, ... max_p_val=0.05, ... save_svg_button=True) &gt;&gt;&gt; open(&quot;Convention-Visualization-Jobs.html&quot;, 'wb').write(html.encode('utf-8'))"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; from scattertext import word_similarity_explorer &gt;&gt;&gt; html = word_similarity_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... target_term='jobs', ... minimum_term_frequency=5, ... pmi_threshold_coefficient=4, ... width_in_pixels=1000, ... metadata=convention_df['speaker'], ... alpha=0.01, ... max_p_val=0.05, ... save_svg_button=True) &gt;&gt;&gt; open("Convention-Visualization-Jobs.html", 'wb').write(html.encode('utf-8')) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/Convention-Visualization-Jobs.html" rel="nofollow"><img src="https://camo.githubusercontent.com/bbe72fe07172e1c0edeb0ef26a30147847d86f7158d0564a4daf7ac61fbc8741/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e2d56697375616c697a6174696f6e2d4a6f62732e706e67" alt="Convention-Visualization-Jobs.html" data-canonical-src="https://jasonkessler.github.io/Convention-Visualization-Jobs.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Developing and using bespoke word representations</h4><a id="user-content-developing-and-using-bespoke-word-representations" class="anchor" aria-label="Permalink: Developing and using bespoke word representations" href="#developing-and-using-bespoke-word-representations"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Scattertext can interface with Gensim Word2Vec models. For example, here's a snippet from <code>demo_gensim_similarity.py</code> which illustrates how to train and use a word2vec model on a corpus. Note the similarities produced reflect quirks of the corpus, e.g., "8" tends to refer to the 8% unemployment rate at the time of the convention.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import spacy from gensim.models import word2vec from scattertext import SampleCorpora, word_similarity_explorer_gensim, Word2VecFromParsedCorpus from scattertext.CorpusFromParsedDocuments import CorpusFromParsedDocuments nlp = spacy.en.English() convention_df = SampleCorpora.ConventionData2012.get_data() convention_df['parsed'] = convention_df.text.apply(nlp) corpus = CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parsed').build() model = word2vec.Word2Vec(size=300, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=0, seed=1, workers=1, min_alpha=0.0001, sg=1, hs=1, negative=0, cbow_mean=0, iter=1, null_word=0, trim_rule=None, sorted_vocab=1) html = word_similarity_explorer_gensim(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', target_term='jobs', minimum_term_frequency=5, pmi_threshold_coefficient=4, width_in_pixels=1000, metadata=convention_df['speaker'], word2vec=Word2VecFromParsedCorpus(corpus, model).train(), max_p_val=0.05, save_svg_button=True) open('./demo_gensim_similarity.html', 'wb').write(html.encode('utf-8'))"><pre><span class="pl-k">import</span> <span class="pl-s1">spacy</span> <span class="pl-k">from</span> <span class="pl-s1">gensim</span>.<span class="pl-s1">models</span> <span class="pl-k">import</span> <span class="pl-s1">word2vec</span> <span class="pl-k">from</span> <span class="pl-s1">scattertext</span> <span class="pl-k">import</span> <span class="pl-v">SampleCorpora</span>, <span class="pl-s1">word_similarity_explorer_gensim</span>, <span class="pl-v">Word2VecFromParsedCorpus</span> <span class="pl-k">from</span> <span class="pl-s1">scattertext</span>.<span class="pl-v">CorpusFromParsedDocuments</span> <span class="pl-k">import</span> <span class="pl-v">CorpusFromParsedDocuments</span> <span class="pl-s1">nlp</span> <span class="pl-c1">=</span> <span class="pl-s1">spacy</span>.<span class="pl-c1">en</span>.<span class="pl-c1">English</span>() <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-v">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">convention_df</span>[<span class="pl-s">'parsed'</span>] <span class="pl-c1">=</span> <span class="pl-s1">convention_df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">nlp</span>) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-en">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parsed'</span>).<span class="pl-c1">build</span>() <span class="pl-s1">model</span> <span class="pl-c1">=</span> <span class="pl-s1">word2vec</span>.<span class="pl-c1">Word2Vec</span>(<span class="pl-s1">size</span><span class="pl-c1">=</span><span class="pl-c1">300</span>, <span class="pl-s1">alpha</span><span class="pl-c1">=</span><span class="pl-c1">0.025</span>, <span class="pl-s1">window</span><span class="pl-c1">=</span><span class="pl-c1">5</span>, <span class="pl-s1">min_count</span><span class="pl-c1">=</span><span class="pl-c1">5</span>, <span class="pl-s1">max_vocab_size</span><span class="pl-c1">=</span><span class="pl-c1">None</span>, <span class="pl-s1">sample</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">seed</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">workers</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">min_alpha</span><span class="pl-c1">=</span><span class="pl-c1">0.0001</span>, <span class="pl-s1">sg</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">hs</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">negative</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">cbow_mean</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">iter</span><span class="pl-c1">=</span><span class="pl-c1">1</span>, <span class="pl-s1">null_word</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">trim_rule</span><span class="pl-c1">=</span><span class="pl-c1">None</span>, <span class="pl-s1">sorted_vocab</span><span class="pl-c1">=</span><span class="pl-c1">1</span>) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-en">word_similarity_explorer_gensim</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">target_term</span><span class="pl-c1">=</span><span class="pl-s">'jobs'</span>, <span class="pl-s1">minimum_term_frequency</span><span class="pl-c1">=</span><span class="pl-c1">5</span>, <span class="pl-s1">pmi_threshold_coefficient</span><span class="pl-c1">=</span><span class="pl-c1">4</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>[<span class="pl-s">'speaker'</span>], <span class="pl-s1">word2vec</span><span class="pl-c1">=</span><span class="pl-en">Word2VecFromParsedCorpus</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">model</span>).<span class="pl-c1">train</span>(), <span class="pl-s1">max_p_val</span><span class="pl-c1">=</span><span class="pl-c1">0.05</span>, <span class="pl-s1">save_svg_button</span><span class="pl-c1">=</span><span class="pl-c1">True</span>) <span class="pl-en">open</span>(<span class="pl-s">'./demo_gensim_similarity.html'</span>, <span class="pl-s">'wb'</span>).<span class="pl-c1">write</span>(<span class="pl-s1">html</span>.<span class="pl-c1">encode</span>(<span class="pl-s">'utf-8'</span>))</pre></div> <p dir="auto">How Democrats and Republicans talked differently about "jobs" in their 2012 convention speeches. <a href="https://jasonkessler.github.io/demo_gensim_similarity.html" rel="nofollow"><img src="https://camo.githubusercontent.com/0755b8c2827ae7943593be421eca3c9050bdab4418b7de9ff7f06f3ac581cccf/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f67656e73696d5f73696d696c61726974792e706e67" alt="Convention-Visualization-Jobs.html" data-canonical-src="https://jasonkessler.github.io/demo_gensim_similarity.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing any kind of term score</h3><a id="user-content-visualizing-any-kind-of-term-score" class="anchor" aria-label="Permalink: Visualizing any kind of term score" href="#visualizing-any-kind-of-term-score"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">We can use Scattertext to visualize alternative types of word scores, and ensure that 0 scores are greyed out. Use the <code>sparse_explroer</code> function to acomplish this, and see its source code for more details.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; from sklearn.linear_model import Lasso &gt;&gt;&gt; from scattertext import sparse_explorer &gt;&gt;&gt; html = sparse_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... scores = corpus.get_regression_coefs('democrat', Lasso(max_iter=10000)), ... minimum_term_frequency=5, ... pmi_threshold_coefficient=4, ... width_in_pixels=1000, ... metadata=convention_df['speaker']) &gt;&gt;&gt; open('./Convention-Visualization-Sparse.html', 'wb').write(html.encode('utf-8'))"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; from sklearn.linear_model import Lasso &gt;&gt;&gt; from scattertext import sparse_explorer &gt;&gt;&gt; html = sparse_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... scores = corpus.get_regression_coefs('democrat', Lasso(max_iter=10000)), ... minimum_term_frequency=5, ... pmi_threshold_coefficient=4, ... width_in_pixels=1000, ... metadata=convention_df['speaker']) &gt;&gt;&gt; open('./Convention-Visualization-Sparse.html', 'wb').write(html.encode('utf-8')) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/Convention-Visualization-Sparse.html" rel="nofollow"><img src="https://camo.githubusercontent.com/d48e52dae93e96541a7a2f20e30f35c4d0dcb72f8de6fa3509ead651209bd710/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e2d56697375616c697a6174696f6e2d5370617273652e706e67" alt="Convention-Visualization-Sparse.html" data-canonical-src="https://jasonkessler.github.io/Convention-Visualization-Sparse.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Custom term positions</h3><a id="user-content-custom-term-positions" class="anchor" aria-label="Permalink: Custom term positions" href="#custom-term-positions"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">You can also use custom term positions and axis labels. For example, you can base terms' y-axis positions on a regression coefficient and their x-axis on term frequency and label the axes accordingly. The one catch is that axis positions must be scaled between 0 and 1.</p> <p dir="auto">First, let's define two scaling functions: <code>scale</code> to project positive values to [0,1], and <code>zero_centered_scale</code> project real values to [0,1], with negative values always &lt;0.5, and positive values always &gt;0.5.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; def scale(ar): ... return (ar - ar.min()) / (ar.max() - ar.min()) ... &gt;&gt;&gt; def zero_centered_scale(ar): ... ar[ar &gt; 0] = scale(ar[ar &gt; 0]) ... ar[ar &lt; 0] = -scale(-ar[ar &lt; 0]) ... return (ar + 1) / 2."><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; def scale(ar): ... return (ar - ar.min()) / (ar.max() - ar.min()) ... &gt;&gt;&gt; def zero_centered_scale(ar): ... ar[ar &gt; 0] = scale(ar[ar &gt; 0]) ... ar[ar &lt; 0] = -scale(-ar[ar &lt; 0]) ... return (ar + 1) / 2. </code></pre></div> <p dir="auto">Next, let's compute and scale term frequencies and L2-penalized regression coefficients. We'll hang on to the original coefficients and allow users to view them by mousing over terms.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; from sklearn.linear_model import LogisticRegression &gt;&gt;&gt; import numpy as np &gt;&gt;&gt; &gt;&gt;&gt; frequencies_scaled = scale(np.log(term_freq_df.sum(axis=1).values)) &gt;&gt;&gt; scores = corpus.get_logreg_coefs('democrat', ... LogisticRegression(penalty='l2', C=10, max_iter=10000, n_jobs=-1)) &gt;&gt;&gt; scores_scaled = zero_centered_scale(scores)"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; from sklearn.linear_model import LogisticRegression &gt;&gt;&gt; import numpy as np &gt;&gt;&gt; &gt;&gt;&gt; frequencies_scaled = scale(np.log(term_freq_df.sum(axis=1).values)) &gt;&gt;&gt; scores = corpus.get_logreg_coefs('democrat', ... LogisticRegression(penalty='l2', C=10, max_iter=10000, n_jobs=-1)) &gt;&gt;&gt; scores_scaled = zero_centered_scale(scores) </code></pre></div> <p dir="auto">Finally, we can write the visualization. Note the use of the <code>x_coords</code> and <code>y_coords</code> parameters to store the respective coordinates, the <code>scores</code> and <code>sort_by_dist</code> arguments to register the original coefficients and use them to rank the terms in the right-hand list, and the <code>x_label</code> and <code>y_label</code> arguments to label axes.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; html = produce_scattertext_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... minimum_term_frequency=5, ... pmi_threshold_coefficient=4, ... width_in_pixels=1000, ... x_coords=frequencies_scaled, ... y_coords=scores_scaled, ... scores=scores, ... sort_by_dist=False, ... metadata=convention_df['speaker'], ... x_label='Log frequency', ... y_label='L2-penalized logistic regression coef') &gt;&gt;&gt; open('demo_custom_coordinates.html', 'wb').write(html.encode('utf-8'))"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; html = produce_scattertext_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... minimum_term_frequency=5, ... pmi_threshold_coefficient=4, ... width_in_pixels=1000, ... x_coords=frequencies_scaled, ... y_coords=scores_scaled, ... scores=scores, ... sort_by_dist=False, ... metadata=convention_df['speaker'], ... x_label='Log frequency', ... y_label='L2-penalized logistic regression coef') &gt;&gt;&gt; open('demo_custom_coordinates.html', 'wb').write(html.encode('utf-8')) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_custom_coordinates.html" rel="nofollow"><img src="https://camo.githubusercontent.com/0b5bc3e00051a092c9dd715486f8eeae2703783cae38cfc21b1f759f83a8e1cb/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f637573746f6d5f636f6f7264696e617465732e706e67" alt="demo_custom_coordinates.html" data-canonical-src="https://jasonkessler.github.io/demo_custom_coordinates.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Emoji analysis</h3><a id="user-content-emoji-analysis" class="anchor" aria-label="Permalink: Emoji analysis" href="#emoji-analysis"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">The Emoji analysis capability displays a chart of the category-specific distribution of Emoji. Let's look at a new corpus, a set of tweets. We'll build a visualization showing how men and women use emoji differently.</p> <p dir="auto">Note: the following example is implemented in <code>demo_emoji.py</code>.</p> <p dir="auto">First, we'll load the dataset and parse it using NLTK's tweet tokenizer. Note, install NLTK before running this example. It will take some time for the dataset to download.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import nltk, urllib.request, io, agefromname, zipfile import scattertext as st import pandas as pd with zipfile.ZipFile(io.BytesIO(urllib.request.urlopen( 'http://followthehashtag.com/content/uploads/USA-Geolocated-tweets-free-dataset-Followthehashtag.zip' ).read())) as zf: df = pd.read_excel(zf.open('dashboard_x_usa_x_filter_nativeretweets.xlsx')) nlp = st.tweet_tokenzier_factory(nltk.tokenize.TweetTokenizer()) df['parse'] = df['Tweet content'].apply(nlp) df.iloc[0] ''' Tweet Id 721318437075685382 Date 2016-04-16 Hour 12:44 User Name Bill Schulhoff Nickname BillSchulhoff Bio Husband,Dad,GrandDad,Ordained Minister, Umpire... Tweet content Wind 3.2 mph NNE. Barometer 30.20 in, Rising s... Favs NaN RTs NaN Latitude 40.7603 Longitude -72.9547 Country US Place (as appears on Bio) East Patchogue, NY Profile picture http://pbs.twimg.com/profile_images/3788000007... Followers 386 Following 705 Listed 24 Tweet language (ISO 639-1) en Tweet Url http://www.twitter.com/BillSchulhoff/status/72... parse Wind 3.2 mph NNE. Barometer 30.20 in, Rising s... Name: 0, dtype: object '''"><pre><span class="pl-k">import</span> <span class="pl-s1">nltk</span>, <span class="pl-s1">urllib</span>.<span class="pl-s1">request</span>, <span class="pl-s1">io</span>, <span class="pl-s1">agefromname</span>, <span class="pl-s1">zipfile</span> <span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-k">import</span> <span class="pl-s1">pandas</span> <span class="pl-k">as</span> <span class="pl-s1">pd</span> <span class="pl-k">with</span> <span class="pl-s1">zipfile</span>.<span class="pl-c1">ZipFile</span>(<span class="pl-s1">io</span>.<span class="pl-c1">BytesIO</span>(<span class="pl-s1">urllib</span>.<span class="pl-c1">request</span>.<span class="pl-c1">urlopen</span>( <span class="pl-s">'http://followthehashtag.com/content/uploads/USA-Geolocated-tweets-free-dataset-Followthehashtag.zip'</span> ).<span class="pl-c1">read</span>())) <span class="pl-k">as</span> <span class="pl-s1">zf</span>: <span class="pl-s1">df</span> <span class="pl-c1">=</span> <span class="pl-s1">pd</span>.<span class="pl-c1">read_excel</span>(<span class="pl-s1">zf</span>.<span class="pl-c1">open</span>(<span class="pl-s">'dashboard_x_usa_x_filter_nativeretweets.xlsx'</span>)) <span class="pl-s1">nlp</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">tweet_tokenzier_factory</span>(<span class="pl-s1">nltk</span>.<span class="pl-c1">tokenize</span>.<span class="pl-c1">TweetTokenizer</span>()) <span class="pl-s1">df</span>[<span class="pl-s">'parse'</span>] <span class="pl-c1">=</span> <span class="pl-s1">df</span>[<span class="pl-s">'Tweet content'</span>].<span class="pl-c1">apply</span>(<span class="pl-s1">nlp</span>) <span class="pl-s1">df</span>.<span class="pl-c1">iloc</span>[<span class="pl-c1">0</span>] <span class="pl-s">'''</span> <span class="pl-s">Tweet Id 721318437075685382</span> <span class="pl-s">Date 2016-04-16</span> <span class="pl-s">Hour 12:44</span> <span class="pl-s">User Name Bill Schulhoff</span> <span class="pl-s">Nickname BillSchulhoff</span> <span class="pl-s">Bio Husband,Dad,GrandDad,Ordained Minister, Umpire...</span> <span class="pl-s">Tweet content Wind 3.2 mph NNE. Barometer 30.20 in, Rising s...</span> <span class="pl-s">Favs NaN</span> <span class="pl-s">RTs NaN</span> <span class="pl-s">Latitude 40.7603</span> <span class="pl-s">Longitude -72.9547</span> <span class="pl-s">Country US</span> <span class="pl-s">Place (as appears on Bio) East Patchogue, NY</span> <span class="pl-s">Profile picture http://pbs.twimg.com/profile_images/3788000007...</span> <span class="pl-s">Followers 386</span> <span class="pl-s">Following 705</span> <span class="pl-s">Listed 24</span> <span class="pl-s">Tweet language (ISO 639-1) en</span> <span class="pl-s">Tweet Url http://www.twitter.com/BillSchulhoff/status/72...</span> <span class="pl-s">parse Wind 3.2 mph NNE. Barometer 30.20 in, Rising s...</span> <span class="pl-s">Name: 0, dtype: object</span> <span class="pl-s">'''</span></pre></div> <p dir="auto">Next, we'll use the <a href="https://github.com/JasonKessler/agefromname">AgeFromName</a> package to find the probabilities of the gender of each user given their first name. First, we'll find a dataframe indexed on first names that contains the probability that each someone with that first name is male (<code>male_prob</code>).</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="male_prob = agefromname.AgeFromName().get_all_name_male_prob() male_prob.iloc[0] ''' hi 1.00000 lo 0.95741 prob 1.00000 Name: aaban, dtype: float64 '''"><pre><span class="pl-s1">male_prob</span> <span class="pl-c1">=</span> <span class="pl-s1">agefromname</span>.<span class="pl-c1">AgeFromName</span>().<span class="pl-c1">get_all_name_male_prob</span>() <span class="pl-s1">male_prob</span>.<span class="pl-c1">iloc</span>[<span class="pl-c1">0</span>] <span class="pl-s">'''</span> <span class="pl-s">hi 1.00000</span> <span class="pl-s">lo 0.95741</span> <span class="pl-s">prob 1.00000</span> <span class="pl-s">Name: aaban, dtype: float64</span> <span class="pl-s">'''</span></pre></div> <p dir="auto">Next, we'll extract the first names of each user, and use the <code>male_prob</code> data frame to find users whose names indicate there is at least a 90% chance they are either male or female, label those users, and create new data frame <code>df_mf</code> with only those users.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="df['first_name'] = df['User Name'].apply(lambda x: x.split()[0].lower() if type(x) == str and len(x.split()) &gt; 0 else x) df_aug = pd.merge(df, male_prob, left_on='first_name', right_index=True) df_aug['gender'] = df_aug['prob'].apply(lambda x: 'm' if x &gt; 0.9 else 'f' if x &lt; 0.1 else '?') df_mf = df_aug[df_aug['gender'].isin(['m', 'f'])]"><pre><span class="pl-s1">df</span>[<span class="pl-s">'first_name'</span>] <span class="pl-c1">=</span> <span class="pl-s1">df</span>[<span class="pl-s">'User Name'</span>].<span class="pl-c1">apply</span>(<span class="pl-k">lambda</span> <span class="pl-s1">x</span>: <span class="pl-s1">x</span>.<span class="pl-c1">split</span>()[<span class="pl-c1">0</span>].<span class="pl-c1">lower</span>() <span class="pl-k">if</span> <span class="pl-en">type</span>(<span class="pl-s1">x</span>) <span class="pl-c1">==</span> <span class="pl-s1">str</span> <span class="pl-c1">and</span> <span class="pl-en">len</span>(<span class="pl-s1">x</span>.<span class="pl-c1">split</span>()) <span class="pl-c1">&gt;</span> <span class="pl-c1">0</span> <span class="pl-k">else</span> <span class="pl-s1">x</span>) <span class="pl-s1">df_aug</span> <span class="pl-c1">=</span> <span class="pl-s1">pd</span>.<span class="pl-c1">merge</span>(<span class="pl-s1">df</span>, <span class="pl-s1">male_prob</span>, <span class="pl-s1">left_on</span><span class="pl-c1">=</span><span class="pl-s">'first_name'</span>, <span class="pl-s1">right_index</span><span class="pl-c1">=</span><span class="pl-c1">True</span>) <span class="pl-s1">df_aug</span>[<span class="pl-s">'gender'</span>] <span class="pl-c1">=</span> <span class="pl-s1">df_aug</span>[<span class="pl-s">'prob'</span>].<span class="pl-c1">apply</span>(<span class="pl-k">lambda</span> <span class="pl-s1">x</span>: <span class="pl-s">'m'</span> <span class="pl-k">if</span> <span class="pl-s1">x</span> <span class="pl-c1">&gt;</span> <span class="pl-c1">0.9</span> <span class="pl-k">else</span> <span class="pl-s">'f'</span> <span class="pl-k">if</span> <span class="pl-s1">x</span> <span class="pl-c1">&lt;</span> <span class="pl-c1">0.1</span> <span class="pl-k">else</span> <span class="pl-s">'?'</span>) <span class="pl-s1">df_mf</span> <span class="pl-c1">=</span> <span class="pl-s1">df_aug</span>[<span class="pl-s1">df_aug</span>[<span class="pl-s">'gender'</span>].<span class="pl-c1">isin</span>([<span class="pl-s">'m'</span>, <span class="pl-s">'f'</span>])]</pre></div> <p dir="auto">The key to this analysis is to construct a corpus using only the emoji extractor <code>st.FeatsFromSpacyDocOnlyEmoji</code> which builds a corpus only from emoji and not from anything else.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="corpus = st.CorpusFromParsedDocuments( df_mf, parsed_col='parse', category_col='gender', feats_from_spacy_doc=st.FeatsFromSpacyDocOnlyEmoji() ).build()"><pre><span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>( <span class="pl-s1">df_mf</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'gender'</span>, <span class="pl-s1">feats_from_spacy_doc</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">FeatsFromSpacyDocOnlyEmoji</span>() ).<span class="pl-c1">build</span>()</pre></div> <p dir="auto">Next, we'll run this through a standard <code>produce_scattertext_explorer</code> visualization generation.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_scattertext_explorer( corpus, category='f', category_name='Female', not_category_name='Male', use_full_doc=True, term_ranker=st.OncePerDocFrequencyRanker, sort_by_dist=False, metadata=(df_mf['User Name'] + ' (@' + df_mf['Nickname'] + ') ' + df_mf['Date'].astype(str)), width_in_pixels=1000 ) open(&quot;EmojiGender.html&quot;, 'wb').write(html.encode('utf-8'))"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'f'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Female'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Male'</span>, <span class="pl-s1">use_full_doc</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">term_ranker</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">OncePerDocFrequencyRanker</span>, <span class="pl-s1">sort_by_dist</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span>(<span class="pl-s1">df_mf</span>[<span class="pl-s">'User Name'</span>] <span class="pl-c1">+</span> <span class="pl-s">' (@'</span> <span class="pl-c1">+</span> <span class="pl-s1">df_mf</span>[<span class="pl-s">'Nickname'</span>] <span class="pl-c1">+</span> <span class="pl-s">') '</span> <span class="pl-c1">+</span> <span class="pl-s1">df_mf</span>[<span class="pl-s">'Date'</span>].<span class="pl-c1">astype</span>(<span class="pl-s1">str</span>)), <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span> ) <span class="pl-en">open</span>(<span class="pl-s">"EmojiGender.html"</span>, <span class="pl-s">'wb'</span>).<span class="pl-c1">write</span>(<span class="pl-s1">html</span>.<span class="pl-c1">encode</span>(<span class="pl-s">'utf-8'</span>))</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/EmojiGender.html" rel="nofollow"><img src="https://camo.githubusercontent.com/839142c6951d1b32dfbf5c2754266ac5c73338a28cd97449dc674e764097feba/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f456d6f6a6947656e6465722e706e67" alt="EmojiGender.html" data-canonical-src="https://jasonkessler.github.io/EmojiGender.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing SentencePiece Tokens</h3><a id="user-content-visualizing-sentencepiece-tokens" class="anchor" aria-label="Permalink: Visualizing SentencePiece Tokens" href="#visualizing-sentencepiece-tokens"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><a href="https://github.com/google/sentencepiece">SentencePiece</a> tokenization is a subword tokenization technique which relies on a language-model to produce optimized tokenization. It has been used in large, transformer-based contextual language models.</p> <p dir="auto">Ensure to run <code>$ pip install sentencepiece</code> before running this example.</p> <p dir="auto">First, let's load the political convention data set as normal.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import tempfile import re import scattertext as st convention_df = st.SampleCorpora.ConventionData2012.get_data() convention_df['parse'] = convention_df.text.apply(st.whitespace_nlp_with_sentences)"><pre><span class="pl-k">import</span> <span class="pl-s1">tempfile</span> <span class="pl-k">import</span> <span class="pl-s1">re</span> <span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">convention_df</span>[<span class="pl-s">'parse'</span>] <span class="pl-c1">=</span> <span class="pl-s1">convention_df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>)</pre></div> <p dir="auto">Next, let's train a SentencePiece tokenizer based on this data. The <code>train_sentence_piece_tokenizer</code> function trains a SentencePieceProcessor on the data set and returns it. You can of course use any SentencePieceProcessor.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content=" def train_sentence_piece_tokenizer(documents, vocab_size): ''' :param documents: list-like, a list of str documents :vocab_size int: the size of the vocabulary to output :return sentencepiece.SentencePieceProcessor ''' import sentencepiece as spm sp = None with tempfile.NamedTemporaryFile(delete=True) as tempf: with tempfile.NamedTemporaryFile(delete=True) as tempm: tempf.write(('\n'.join(documents)).encode()) spm.SentencePieceTrainer.Train( '--input=%s --model_prefix=%s --vocab_size=%s' % (tempf.name, tempm.name, vocab_size) ) sp = spm.SentencePieceProcessor() sp.load(tempm.name + '.model') return sp sp = train_sentence_piece_tokenizer(convention_df.text.values, vocab_size=2000) "><pre><span class="pl-k">def</span> <span class="pl-en">train_sentence_piece_tokenizer</span>(<span class="pl-s1">documents</span>, <span class="pl-s1">vocab_size</span>): <span class="pl-s">'''</span> <span class="pl-s"> :param documents: list-like, a list of str documents</span> <span class="pl-s"> :vocab_size int: the size of the vocabulary to output</span> <span class="pl-s"> </span> <span class="pl-s"> :return sentencepiece.SentencePieceProcessor</span> <span class="pl-s"> '''</span> <span class="pl-k">import</span> <span class="pl-s1">sentencepiece</span> <span class="pl-k">as</span> <span class="pl-s1">spm</span> <span class="pl-s1">sp</span> <span class="pl-c1">=</span> <span class="pl-c1">None</span> <span class="pl-k">with</span> <span class="pl-s1">tempfile</span>.<span class="pl-c1">NamedTemporaryFile</span>(<span class="pl-s1">delete</span><span class="pl-c1">=</span><span class="pl-c1">True</span>) <span class="pl-k">as</span> <span class="pl-s1">tempf</span>: <span class="pl-k">with</span> <span class="pl-s1">tempfile</span>.<span class="pl-c1">NamedTemporaryFile</span>(<span class="pl-s1">delete</span><span class="pl-c1">=</span><span class="pl-c1">True</span>) <span class="pl-k">as</span> <span class="pl-s1">tempm</span>: <span class="pl-s1">tempf</span>.<span class="pl-c1">write</span>((<span class="pl-s">'<span class="pl-cce">\n</span>'</span>.<span class="pl-c1">join</span>(<span class="pl-s1">documents</span>)).<span class="pl-c1">encode</span>()) <span class="pl-s1">spm</span>.<span class="pl-c1">SentencePieceTrainer</span>.<span class="pl-c1">Train</span>( <span class="pl-s">'--input=%s --model_prefix=%s --vocab_size=%s'</span> <span class="pl-c1">%</span> (<span class="pl-s1">tempf</span>.<span class="pl-c1">name</span>, <span class="pl-s1">tempm</span>.<span class="pl-c1">name</span>, <span class="pl-s1">vocab_size</span>) ) <span class="pl-s1">sp</span> <span class="pl-c1">=</span> <span class="pl-s1">spm</span>.<span class="pl-c1">SentencePieceProcessor</span>() <span class="pl-s1">sp</span>.<span class="pl-c1">load</span>(<span class="pl-s1">tempm</span>.<span class="pl-c1">name</span> <span class="pl-c1">+</span> <span class="pl-s">'.model'</span>) <span class="pl-k">return</span> <span class="pl-s1">sp</span> <span class="pl-s1">sp</span> <span class="pl-c1">=</span> <span class="pl-en">train_sentence_piece_tokenizer</span>(<span class="pl-s1">convention_df</span>.<span class="pl-c1">text</span>.<span class="pl-c1">values</span>, <span class="pl-s1">vocab_size</span><span class="pl-c1">=</span><span class="pl-c1">2000</span>)</pre></div> <p dir="auto">Next, let's add the SentencePiece tokens as metadata when creating our corpus. In order to do this, pass a <code>FeatsFromSentencePiece</code> instance into the <code>feats_from_spacy_doc</code> parameter. Pass the SentencePieceProcessor into the constructor.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="corpus = st.CorpusFromParsedDocuments(convention_df, parsed_col='parse', category_col='party', feats_from_spacy_doc=st.FeatsFromSentencePiece(sp)).build()"><pre><span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">feats_from_spacy_doc</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">FeatsFromSentencePiece</span>(<span class="pl-s1">sp</span>)).<span class="pl-c1">build</span>()</pre></div> <p dir="auto">Now we can create the SentencePiece token scatter plot.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_scattertext_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', sort_by_dist=False, metadata=convention_df['party'] + ': ' + convention_df['speaker'], term_scorer=st.RankDifference(), transform=st.Scalers.dense_rank, use_non_text_features=True, use_full_doc=True, )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">sort_by_dist</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>[<span class="pl-s">'party'</span>] <span class="pl-c1">+</span> <span class="pl-s">': '</span> <span class="pl-c1">+</span> <span class="pl-s1">convention_df</span>[<span class="pl-s">'speaker'</span>], <span class="pl-s1">term_scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">RankDifference</span>(), <span class="pl-s1">transform</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">Scalers</span>.<span class="pl-c1">dense_rank</span>, <span class="pl-s1">use_non_text_features</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">use_full_doc</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_sentence_piece.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_sentence_piece.png" alt="demo_sentence_piece.html" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing scikit-learn text classification weights</h3><a id="user-content-visualizing-scikit-learn-text-classification-weights" class="anchor" aria-label="Permalink: Visualizing scikit-learn text classification weights" href="#visualizing-scikit-learn-text-classification-weights"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Suppose you'd like to audit or better understand weights or importances given to bag-of-words features by a classifier.</p> <p dir="auto">It's easy to use Scattertext to do, if you use a Scikit-learn-style classifier.</p> <p dir="auto">For example the <a href="http://contrib.scikit-learn.org/lightning/" rel="nofollow">Lighting</a> package makes available high-performance linear classifiers which are have Scikit-compatible interfaces.</p> <p dir="auto">First, let's import <code>sklearn</code>'s text feature extraction classes, the 20 Newsgroup corpus, Lightning's Primal Coordinate Descent classifier, and Scattertext. We'll also fetch the training portion of the Newsgroup corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="from lightning.classification import CDClassifier from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer import scattertext as st newsgroups_train = fetch_20newsgroups( subset='train', remove=('headers', 'footers', 'quotes') )"><pre><span class="pl-k">from</span> <span class="pl-s1">lightning</span>.<span class="pl-s1">classification</span> <span class="pl-k">import</span> <span class="pl-v">CDClassifier</span> <span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">datasets</span> <span class="pl-k">import</span> <span class="pl-s1">fetch_20newsgroups</span> <span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">feature_extraction</span>.<span class="pl-s1">text</span> <span class="pl-k">import</span> <span class="pl-v">CountVectorizer</span>, <span class="pl-v">TfidfVectorizer</span> <span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">newsgroups_train</span> <span class="pl-c1">=</span> <span class="pl-en">fetch_20newsgroups</span>( <span class="pl-s1">subset</span><span class="pl-c1">=</span><span class="pl-s">'train'</span>, <span class="pl-s1">remove</span><span class="pl-c1">=</span>(<span class="pl-s">'headers'</span>, <span class="pl-s">'footers'</span>, <span class="pl-s">'quotes'</span>) )</pre></div> <p dir="auto">Next, we'll tokenize our corpus twice. Once into tfidf features which will be used to train the classifier, an another time into ngram counts that will be used by Scattertext. It's important that both vectorizers share the same vocabulary, since we'll need to apply the weight vector from the model onto our Scattertext Corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="vectorizer = TfidfVectorizer() tfidf_X = vectorizer.fit_transform(newsgroups_train.data) count_vectorizer = CountVectorizer(vocabulary=vectorizer.vocabulary_)"><pre><span class="pl-s1">vectorizer</span> <span class="pl-c1">=</span> <span class="pl-en">TfidfVectorizer</span>() <span class="pl-s1">tfidf_X</span> <span class="pl-c1">=</span> <span class="pl-s1">vectorizer</span>.<span class="pl-c1">fit_transform</span>(<span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">data</span>) <span class="pl-s1">count_vectorizer</span> <span class="pl-c1">=</span> <span class="pl-en">CountVectorizer</span>(<span class="pl-s1">vocabulary</span><span class="pl-c1">=</span><span class="pl-s1">vectorizer</span>.<span class="pl-c1">vocabulary_</span>)</pre></div> <p dir="auto">Next, we use the <code>CorpusFromScikit</code> factory to build a Scattertext Corpus object. Ensure the <code>X</code> parameter is a document-by-feature matrix. The argument to the <code>y</code> parameter is an array of class labels. Each label is an integer representing a different news group. We the <code>feature_vocabulary</code> is the vocabulary used by the vectorizers. The <code>category_names</code> are a list of the 20 newsgroup names which as a class-label list. The <code>raw_texts</code> is a list of the text of newsgroup texts.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="corpus = st.CorpusFromScikit( X=count_vectorizer.fit_transform(newsgroups_train.data), y=newsgroups_train.target, feature_vocabulary=vectorizer.vocabulary_, category_names=newsgroups_train.target_names, raw_texts=newsgroups_train.data ).build()"><pre><span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromScikit</span>( <span class="pl-c1">X</span><span class="pl-c1">=</span><span class="pl-s1">count_vectorizer</span>.<span class="pl-c1">fit_transform</span>(<span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">data</span>), <span class="pl-s1">y</span><span class="pl-c1">=</span><span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">target</span>, <span class="pl-s1">feature_vocabulary</span><span class="pl-c1">=</span><span class="pl-s1">vectorizer</span>.<span class="pl-c1">vocabulary_</span>, <span class="pl-s1">category_names</span><span class="pl-c1">=</span><span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">target_names</span>, <span class="pl-s1">raw_texts</span><span class="pl-c1">=</span><span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">data</span> ).<span class="pl-c1">build</span>()</pre></div> <p dir="auto">Now, we can train the model on <code>tfidf_X</code> and the categoricla response variable, and capture feature weights for category 0 ("alt.atheism").</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="clf = CDClassifier(penalty=&quot;l1/l2&quot;, loss=&quot;squared_hinge&quot;, multiclass=True, max_iter=20, alpha=1e-4, C=1.0 / tfidf_X.shape[0], tol=1e-3) clf.fit(tfidf_X, newsgroups_train.target) term_scores = clf.coef_[0]"><pre><span class="pl-s1">clf</span> <span class="pl-c1">=</span> <span class="pl-en">CDClassifier</span>(<span class="pl-s1">penalty</span><span class="pl-c1">=</span><span class="pl-s">"l1/l2"</span>, <span class="pl-s1">loss</span><span class="pl-c1">=</span><span class="pl-s">"squared_hinge"</span>, <span class="pl-s1">multiclass</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">max_iter</span><span class="pl-c1">=</span><span class="pl-c1">20</span>, <span class="pl-s1">alpha</span><span class="pl-c1">=</span><span class="pl-c1">1e-4</span>, <span class="pl-c1">C</span><span class="pl-c1">=</span><span class="pl-c1">1.0</span> <span class="pl-c1">/</span> <span class="pl-s1">tfidf_X</span>.<span class="pl-c1">shape</span>[<span class="pl-c1">0</span>], <span class="pl-s1">tol</span><span class="pl-c1">=</span><span class="pl-c1">1e-3</span>) <span class="pl-s1">clf</span>.<span class="pl-c1">fit</span>(<span class="pl-s1">tfidf_X</span>, <span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">target</span>) <span class="pl-s1">term_scores</span> <span class="pl-c1">=</span> <span class="pl-s1">clf</span>.<span class="pl-c1">coef_</span>[<span class="pl-c1">0</span>]</pre></div> <p dir="auto">Finally, we can create a Scattertext plot. We'll use the Monroe-style visualization, and automatically select around 4000 terms that encompass the set of frequent terms, terms with high absolute scores, and terms that are characteristic of the corpus.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="html = st.produce_frequency_explorer( corpus, 'alt.atheism', scores=term_scores, use_term_significance=False, terms_to_include=st.AutoTermSelector.get_selected_terms(corpus, term_scores, 4000), metadata=['/'.join(fn.split('/')[-2:]) for fn in newsgroups_train.filenames] )"><pre><span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_frequency_explorer</span>( <span class="pl-s1">corpus</span>, <span class="pl-s">'alt.atheism'</span>, <span class="pl-s1">scores</span><span class="pl-c1">=</span><span class="pl-s1">term_scores</span>, <span class="pl-s1">use_term_significance</span><span class="pl-c1">=</span><span class="pl-c1">False</span>, <span class="pl-s1">terms_to_include</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">AutoTermSelector</span>.<span class="pl-c1">get_selected_terms</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">term_scores</span>, <span class="pl-c1">4000</span>), <span class="pl-s1">metadata</span><span class="pl-c1">=</span>[<span class="pl-s">'/'</span>.<span class="pl-c1">join</span>(<span class="pl-s1">fn</span>.<span class="pl-c1">split</span>(<span class="pl-s">'/'</span>)[<span class="pl-c1">-</span><span class="pl-c1">2</span>:]) <span class="pl-k">for</span> <span class="pl-s1">fn</span> <span class="pl-c1">in</span> <span class="pl-s1">newsgroups_train</span>.<span class="pl-c1">filenames</span>] )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_sklearn.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_sklearn.png" alt="demo_sklearn.html" style="max-width: 100%;"></a></p> <p dir="auto">Let's take a look at the performance of the classifier:</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="newsgroups_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes')) X_test = vectorizer.transform(newsgroups_test.data) pred = clf.predict(X_test) f1 = f1_score(pred, newsgroups_test.target, average='micro') print(&quot;Microaveraged F1 score&quot;, f1)"><pre><span class="pl-s1">newsgroups_test</span> <span class="pl-c1">=</span> <span class="pl-en">fetch_20newsgroups</span>(<span class="pl-s1">subset</span><span class="pl-c1">=</span><span class="pl-s">'test'</span>, <span class="pl-s1">remove</span><span class="pl-c1">=</span>(<span class="pl-s">'headers'</span>, <span class="pl-s">'footers'</span>, <span class="pl-s">'quotes'</span>)) <span class="pl-v">X_test</span> <span class="pl-c1">=</span> <span class="pl-s1">vectorizer</span>.<span class="pl-c1">transform</span>(<span class="pl-s1">newsgroups_test</span>.<span class="pl-c1">data</span>) <span class="pl-s1">pred</span> <span class="pl-c1">=</span> <span class="pl-s1">clf</span>.<span class="pl-c1">predict</span>(<span class="pl-v">X_test</span>) <span class="pl-s1">f1</span> <span class="pl-c1">=</span> <span class="pl-en">f1_score</span>(<span class="pl-s1">pred</span>, <span class="pl-s1">newsgroups_test</span>.<span class="pl-c1">target</span>, <span class="pl-s1">average</span><span class="pl-c1">=</span><span class="pl-s">'micro'</span>) <span class="pl-en">print</span>(<span class="pl-s">"Microaveraged F1 score"</span>, <span class="pl-s1">f1</span>)</pre></div> <p dir="auto">Microaveraged F1 score 0.662108337759. Not bad over a ~0.05 baseline.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Creating lexicalized semiotic squares</h3><a id="user-content-creating-lexicalized-semiotic-squares" class="anchor" aria-label="Permalink: Creating lexicalized semiotic squares" href="#creating-lexicalized-semiotic-squares"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Please see <a href="http://www.signosemio.com/greimas/semiotic-square.asp" rel="nofollow">Signo</a> for an introduction to semiotic squares.</p> <p dir="auto">Some variants of the semiotic square-creator are can be seen in this notebook, which studies words and phrases in headlines that had low or high Facebook engagement and were published by either BuzzFeed or the New York Times: [<a href="http://nbviewer.jupyter.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb" rel="nofollow">http://nbviewer.jupyter.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb</a>]</p> <p dir="auto">The idea behind the semiotic square is to express the relationship between two opposing concepts and concepts things within a larger domain of a discourse. Examples of opposed concepts life or death, male or female, or, in our example, positive or negative sentiment. Semiotics squares are comprised of four "corners": the upper two corners are the opposing concepts, while the bottom corners are the negation of the concepts.</p> <p dir="auto">Circumscribing the negation of a concept involves finding everything in the domain of discourse that isn't associated with the concept. For example, in the life-death opposition, one can consider the universe of discourse to be all animate beings, real and hypothetical. The not-alive category will cover dead things, but also hypothetical entities like fictional characters or sentient AIs.</p> <p dir="auto">In building lexicalized semiotic squares, we consider concepts to be documents labeled in a corpus. Documents, in this setting, can belong to one of three categories: two labels corresponding to the opposing concepts, a neutral category, indicating a document is in the same domain as the opposition, but cannot fall into one of opposing categories.</p> <p dir="auto">In the example below positive and negative movie reviews are treated as the opposing categories, while plot descriptions of the same movies are treated as the neutral category.</p> <p dir="auto">Terms associated with one of the two opposing categories (relative only to the other) are listed as being associated with that category. Terms associated with a netural category (e.g., not positive) are terms which are associated with the disjunction of the opposite category and the neutral category. For example, not-positive terms are those most associated with the set of negative reviews and plot descriptions vs. positive reviews.</p> <p dir="auto">Common terms among adjacent corners of the square are also listed.</p> <p dir="auto">An HTML-rendered square is accompanied by a scatter plot. Points on the plot are terms. The x-axis is the Z-score of the association to one of the opposed concepts. The y-axis is the Z-score how associated a term is with the neutral set of documents relative to the opposed set. A point's red-blue color indicate the term's opposed-association, while the more desaturated a term is, the more it is associated with the neutral set of documents.</p> <p dir="auto">Update to version 2.2: terms are colored by their nearest semiotic categories across the eight corresponding radial sectors.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import scattertext as st movie_df = st.SampleCorpora.RottenTomatoes.get_data() movie_df.category = movie_df.category.apply (lambda x: {'rotten': 'Negative', 'fresh': 'Positive', 'plot': 'Plot'}[x]) corpus = st.CorpusFromPandas( movie_df, category_col='category', text_col='text', nlp=st.whitespace_nlp_with_sentences ).build().get_unigram_corpus() semiotic_square = st.SemioticSquare( corpus, category_a='Positive', category_b='Negative', neutral_categories=['Plot'], scorer=st.RankDifference(), labels={'not_a_and_not_b': 'Plot Descriptions', 'a_and_b': 'Reviews'} ) html = st.produce_semiotic_square_explorer(semiotic_square, category_name='Positive', not_category_name='Negative', x_label='Fresh-Rotten', y_label='Plot-Review', neutral_category_name='Plot Description', metadata=movie_df['movie_name'])"><pre><span class="pl-k">import</span> <span class="pl-s1">scattertext</span> <span class="pl-k">as</span> <span class="pl-s1">st</span> <span class="pl-s1">movie_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">RottenTomatoes</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">movie_df</span>.<span class="pl-c1">category</span> <span class="pl-c1">=</span> <span class="pl-s1">movie_df</span>.<span class="pl-c1">category</span>.<span class="pl-c1">apply</span> (<span class="pl-k">lambda</span> <span class="pl-s1">x</span>: {<span class="pl-s">'rotten'</span>: <span class="pl-s">'Negative'</span>, <span class="pl-s">'fresh'</span>: <span class="pl-s">'Positive'</span>, <span class="pl-s">'plot'</span>: <span class="pl-s">'Plot'</span>}[<span class="pl-s1">x</span>]) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromPandas</span>( <span class="pl-s1">movie_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'category'</span>, <span class="pl-s1">text_col</span><span class="pl-c1">=</span><span class="pl-s">'text'</span>, <span class="pl-s1">nlp</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span> ).<span class="pl-c1">build</span>().<span class="pl-c1">get_unigram_corpus</span>() <span class="pl-s1">semiotic_square</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SemioticSquare</span>( <span class="pl-s1">corpus</span>, <span class="pl-s1">category_a</span><span class="pl-c1">=</span><span class="pl-s">'Positive'</span>, <span class="pl-s1">category_b</span><span class="pl-c1">=</span><span class="pl-s">'Negative'</span>, <span class="pl-s1">neutral_categories</span><span class="pl-c1">=</span>[<span class="pl-s">'Plot'</span>], <span class="pl-s1">scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">RankDifference</span>(), <span class="pl-s1">labels</span><span class="pl-c1">=</span>{<span class="pl-s">'not_a_and_not_b'</span>: <span class="pl-s">'Plot Descriptions'</span>, <span class="pl-s">'a_and_b'</span>: <span class="pl-s">'Reviews'</span>} ) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_semiotic_square_explorer</span>(<span class="pl-s1">semiotic_square</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Positive'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Negative'</span>, <span class="pl-s1">x_label</span><span class="pl-c1">=</span><span class="pl-s">'Fresh-Rotten'</span>, <span class="pl-s1">y_label</span><span class="pl-c1">=</span><span class="pl-s">'Plot-Review'</span>, <span class="pl-s1">neutral_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Plot Description'</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">movie_df</span>[<span class="pl-s">'movie_name'</span>])</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_semiotic.html" rel="nofollow"><img src="https://camo.githubusercontent.com/d90444553d3659e6515919f00b174504233fc7d85e7e19e999cda1bd6cb35124/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f73656d696f7469635f7371756172655f706c6f742e706e67" alt="semiotic square" data-canonical-src="https://jasonkessler.github.io/semiotic_square_plot.png" style="max-width: 100%;"></a></p> <p dir="auto">There are a number of other types of semiotic square construction functions. Again, please see <a href="https://nbviewer.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb" rel="nofollow">https://nbviewer.org/github/JasonKessler/PuPPyTalk/blob/master/notebooks/Explore-Headlines.ipynb</a> for an overview of these.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Visualizing Topic Models</h3><a id="user-content-visualizing-topic-models" class="anchor" aria-label="Permalink: Visualizing Topic Models" href="#visualizing-topic-models"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">A frequently requested feature of Scattertext has been the ability to visualize topic models. While this capability has existed in some forms (e.g., the Empath visualization), I've finally gotten around to implementing a concise API for such a visualization. There are three main ways to visualize topic models using Scattertext. The first is the simplest: manually entering topic models and visualizing them. The second uses a Scikit-Learn pipeline to produce the topic models for visualization. The third is a novel topic modeling technique, based on finding terms similar to a custom set of seed terms.</p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Manually entered topic models</h4><a id="user-content-manually-entered-topic-models" class="anchor" aria-label="Permalink: Manually entered topic models" href="#manually-entered-topic-models"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">If you have already created a topic model, simply structure it as a dictionary. This dictionary is keyed on string which serve as topic titles and are displayed in the main scatterplot. The values are lists of words that belong to that topic. The words that are in each topic list are bolded when they appear in a snippet.</p> <p dir="auto">Note that currently, there is no support for keyword scores.</p> <p dir="auto">For example, one might manually the following topic models to explore in the Convention corpus:</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="topic_model = { 'money': ['money', 'bank', 'banks', 'finances', 'financial', 'loan', 'dollars', 'income'], 'jobs': ['jobs', 'workers', 'labor', 'employment', 'worker', 'employee', 'job'], 'patriotic': ['america', 'country', 'flag', 'americans', 'patriotism', 'patriotic'], 'family': ['mother', 'father', 'mom', 'dad', 'sister', 'brother', 'grandfather', 'grandmother', 'son', 'daughter'] }"><pre><span class="pl-s1">topic_model</span> <span class="pl-c1">=</span> { <span class="pl-s">'money'</span>: [<span class="pl-s">'money'</span>, <span class="pl-s">'bank'</span>, <span class="pl-s">'banks'</span>, <span class="pl-s">'finances'</span>, <span class="pl-s">'financial'</span>, <span class="pl-s">'loan'</span>, <span class="pl-s">'dollars'</span>, <span class="pl-s">'income'</span>], <span class="pl-s">'jobs'</span>: [<span class="pl-s">'jobs'</span>, <span class="pl-s">'workers'</span>, <span class="pl-s">'labor'</span>, <span class="pl-s">'employment'</span>, <span class="pl-s">'worker'</span>, <span class="pl-s">'employee'</span>, <span class="pl-s">'job'</span>], <span class="pl-s">'patriotic'</span>: [<span class="pl-s">'america'</span>, <span class="pl-s">'country'</span>, <span class="pl-s">'flag'</span>, <span class="pl-s">'americans'</span>, <span class="pl-s">'patriotism'</span>, <span class="pl-s">'patriotic'</span>], <span class="pl-s">'family'</span>: [<span class="pl-s">'mother'</span>, <span class="pl-s">'father'</span>, <span class="pl-s">'mom'</span>, <span class="pl-s">'dad'</span>, <span class="pl-s">'sister'</span>, <span class="pl-s">'brother'</span>, <span class="pl-s">'grandfather'</span>, <span class="pl-s">'grandmother'</span>, <span class="pl-s">'son'</span>, <span class="pl-s">'daughter'</span>] }</pre></div> <p dir="auto">We can use the <code>FeatsFromTopicModel</code> class to transform this topic model into one which can be visualized using Scattertext. This is used just like any other feature builder, and we pass the topic model object into <code>produce_scattertext_explorer</code>.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="import scattertext as st topic_feature_builder = st.FeatsFromTopicModel(topic_model) topic_corpus = st.CorpusFromParsedDocuments( convention_df, category_col='party', parsed_col='parse', feats_from_spacy_doc=topic_feature_builder ).build() html = st.produce_scattertext_explorer( topic_corpus, category='democrat', category_name='Democratic', not_category_name='Republican', width_in_pixels=1000, metadata=convention_df['speaker'], use_non_text_features=True, use_full_doc=True, pmi_threshold_coefficient=0, topic_model_term_lists=topic_feature_builder.get_top_model_term_lists() )"><pre class="notranslate"><code>import scattertext as st topic_feature_builder = st.FeatsFromTopicModel(topic_model) topic_corpus = st.CorpusFromParsedDocuments( convention_df, category_col='party', parsed_col='parse', feats_from_spacy_doc=topic_feature_builder ).build() html = st.produce_scattertext_explorer( topic_corpus, category='democrat', category_name='Democratic', not_category_name='Republican', width_in_pixels=1000, metadata=convention_df['speaker'], use_non_text_features=True, use_full_doc=True, pmi_threshold_coefficient=0, topic_model_term_lists=topic_feature_builder.get_top_model_term_lists() ) </code></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_custom_topic_model.html" rel="nofollow"><img src="https://raw.githubusercontent.com/JasonKessler/jasonkessler.github.io/master/demo_custom_topic_model.png" alt="demo_custom_topic_model.html" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Using Scikit-Learn for Topic Modeling</h4><a id="user-content-using-scikit-learn-for-topic-modeling" class="anchor" aria-label="Permalink: Using Scikit-Learn for Topic Modeling" href="#using-scikit-learn-for-topic-modeling"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Since topic modeling using document-level coocurence generally produces poor results, I've added a <code>SentencesForTopicModeling</code> class which allows clusterting by coocurence at the sentence-level. It requires a <code>ParsedCorpus</code> object to be passed to its constructor, and creates a term-sentence matrix internally.</p> <p dir="auto">Next, you can create a topic model dictionary like the one above by passing in a Scikit-Learn clustering or dimensionality reduction pipeline. The only constraint is the last transformer in the pipeline must populate a <code>components_</code> attribute.</p> <p dir="auto">The <code>num_topics_per_term</code> attribute specifies how many terms should be added to a list.</p> <p dir="auto">In the following example, we'll use NMF to cluster a stoplisted, unigram corpus of documents, and use the topic model dictionary to create a <code>FeatsFromTopicModel</code>, just like before.</p> <p dir="auto">Note that in <code>produce_scattertext_explorer</code>, we make the <code>topic_model_preview_size</code> 20 in order to show a preview of the first 20 terms in the topic in the snippet view as opposed to the default 10.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="from sklearn.decomposition import NMF from sklearn.feature_extraction.text import TfidfTransformer from sklearn.pipeline import Pipeline convention_df = st.SampleCorpora.ConventionData2012.get_data() convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences) unigram_corpus = (st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse') .build().get_stoplisted_unigram_corpus()) topic_model = st.SentencesForTopicModeling(unigram_corpus).get_topics_from_model( Pipeline([ ('tfidf', TfidfTransformer(sublinear_tf=True)), ('nmf', (NMF(n_components=100, alpha=.1, l1_ratio=.5, random_state=0))) ]), num_terms_per_topic=20 ) topic_feature_builder = st.FeatsFromTopicModel(topic_model) topic_corpus = st.CorpusFromParsedDocuments( convention_df, category_col='party', parsed_col='parse', feats_from_spacy_doc=topic_feature_builder ).build() html = st.produce_scattertext_explorer( topic_corpus, category='democrat', category_name='Democratic', not_category_name='Republican', width_in_pixels=1000, metadata=convention_df['speaker'], use_non_text_features=True, use_full_doc=True, pmi_threshold_coefficient=0, topic_model_term_lists=topic_feature_builder.get_top_model_term_lists(), topic_model_preview_size=20 )"><pre><span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">decomposition</span> <span class="pl-k">import</span> <span class="pl-c1">NMF</span> <span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">feature_extraction</span>.<span class="pl-s1">text</span> <span class="pl-k">import</span> <span class="pl-v">TfidfTransformer</span> <span class="pl-k">from</span> <span class="pl-s1">sklearn</span>.<span class="pl-s1">pipeline</span> <span class="pl-k">import</span> <span class="pl-v">Pipeline</span> <span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">convention_df</span>[<span class="pl-s">'parse'</span>] <span class="pl-c1">=</span> <span class="pl-s1">convention_df</span>[<span class="pl-s">'text'</span>].<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) <span class="pl-s1">unigram_corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>) .<span class="pl-c1">build</span>().<span class="pl-c1">get_stoplisted_unigram_corpus</span>()) <span class="pl-s1">topic_model</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SentencesForTopicModeling</span>(<span class="pl-s1">unigram_corpus</span>).<span class="pl-c1">get_topics_from_model</span>( <span class="pl-en">Pipeline</span>([ (<span class="pl-s">'tfidf'</span>, <span class="pl-en">TfidfTransformer</span>(<span class="pl-s1">sublinear_tf</span><span class="pl-c1">=</span><span class="pl-c1">True</span>)), (<span class="pl-s">'nmf'</span>, (<span class="pl-en">NMF</span>(<span class="pl-s1">n_components</span><span class="pl-c1">=</span><span class="pl-c1">100</span>, <span class="pl-s1">alpha</span><span class="pl-c1">=</span><span class="pl-c1">.1</span>, <span class="pl-s1">l1_ratio</span><span class="pl-c1">=</span><span class="pl-c1">.5</span>, <span class="pl-s1">random_state</span><span class="pl-c1">=</span><span class="pl-c1">0</span>))) ]), <span class="pl-s1">num_terms_per_topic</span><span class="pl-c1">=</span><span class="pl-c1">20</span> ) <span class="pl-s1">topic_feature_builder</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">FeatsFromTopicModel</span>(<span class="pl-s1">topic_model</span>) <span class="pl-s1">topic_corpus</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>( <span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>, <span class="pl-s1">feats_from_spacy_doc</span><span class="pl-c1">=</span><span class="pl-s1">topic_feature_builder</span> ).<span class="pl-c1">build</span>() <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_scattertext_explorer</span>( <span class="pl-s1">topic_corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">width_in_pixels</span><span class="pl-c1">=</span><span class="pl-c1">1000</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>[<span class="pl-s">'speaker'</span>], <span class="pl-s1">use_non_text_features</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">use_full_doc</span><span class="pl-c1">=</span><span class="pl-c1">True</span>, <span class="pl-s1">pmi_threshold_coefficient</span><span class="pl-c1">=</span><span class="pl-c1">0</span>, <span class="pl-s1">topic_model_term_lists</span><span class="pl-c1">=</span><span class="pl-s1">topic_feature_builder</span>.<span class="pl-c1">get_top_model_term_lists</span>(), <span class="pl-s1">topic_model_preview_size</span><span class="pl-c1">=</span><span class="pl-c1">20</span> )</pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_nmf_topic_model.html" rel="nofollow"><img src="https://camo.githubusercontent.com/be5122831e504d8d1e6a86f60161cd0b5316c8f49600eb53d73314ac2f01ba6e/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f6e6d665f746f7069635f6d6f64656c2e706e67" alt="demo_nmf_topic_model.html" data-canonical-src="https://jasonkessler.github.io/demo_nmf_topic_model.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h4 tabindex="-1" class="heading-element" dir="auto">Using a Word List to Generate a Series of Topics</h4><a id="user-content-using-a-word-list-to-generate-a-series-of-topics" class="anchor" aria-label="Permalink: Using a Word List to Generate a Series of Topics" href="#using-a-word-list-to-generate-a-series-of-topics"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">A surprisingly easy way to generate good topic models is to use a term scoring formula to find words that are associated with sentences where a seed word occurs vs. where one doesn't occur.</p> <p dir="auto">Given a custom term list, the <code>SentencesForTopicModeling.get_topics_from_terms</code> will generate a series of topics. Note that the dense rank difference (<code>RankDifference</code>) works particularly well for this task, and is the default parameter.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="term_list = ['obama', 'romney', 'democrats', 'republicans', 'health', 'military', 'taxes', 'education', 'olympics', 'auto', 'iraq', 'iran', 'israel'] unigram_corpus = (st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse') .build().get_stoplisted_unigram_corpus()) topic_model = (st.SentencesForTopicModeling(unigram_corpus) .get_topics_from_terms(term_list, scorer=st.RankDifference(), num_terms_per_topic=20)) topic_feature_builder = st.FeatsFromTopicModel(topic_model) # The remaining code is identical to two examples above. See demo_word_list_topic_model.py # for the complete example."><pre><span class="pl-s1">term_list</span> <span class="pl-c1">=</span> [<span class="pl-s">'obama'</span>, <span class="pl-s">'romney'</span>, <span class="pl-s">'democrats'</span>, <span class="pl-s">'republicans'</span>, <span class="pl-s">'health'</span>, <span class="pl-s">'military'</span>, <span class="pl-s">'taxes'</span>, <span class="pl-s">'education'</span>, <span class="pl-s">'olympics'</span>, <span class="pl-s">'auto'</span>, <span class="pl-s">'iraq'</span>, <span class="pl-s">'iran'</span>, <span class="pl-s">'israel'</span>] <span class="pl-s1">unigram_corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>) .<span class="pl-c1">build</span>().<span class="pl-c1">get_stoplisted_unigram_corpus</span>()) <span class="pl-s1">topic_model</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">SentencesForTopicModeling</span>(<span class="pl-s1">unigram_corpus</span>) .<span class="pl-c1">get_topics_from_terms</span>(<span class="pl-s1">term_list</span>, <span class="pl-s1">scorer</span><span class="pl-c1">=</span><span class="pl-s1">st</span>.<span class="pl-c1">RankDifference</span>(), <span class="pl-s1">num_terms_per_topic</span><span class="pl-c1">=</span><span class="pl-c1">20</span>)) <span class="pl-s1">topic_feature_builder</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">FeatsFromTopicModel</span>(<span class="pl-s1">topic_model</span>) <span class="pl-c"># The remaining code is identical to two examples above. See demo_word_list_topic_model.py</span> <span class="pl-c"># for the complete example.</span></pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_word_list_topic_model.html" rel="nofollow"><img src="https://camo.githubusercontent.com/e913957605561125d453405dbb5fea90e88d29e942e8632bd5deb668fb3d46d1/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f776f72645f6c6973745f746f7069635f6d6f64656c2e706e67" alt="demo_word_list_topic_model.html" data-canonical-src="https://jasonkessler.github.io/demo_word_list_topic_model.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Creating T-SNE-style word embedding projection plots</h3><a id="user-content-creating-t-sne-style-word-embedding-projection-plots" class="anchor" aria-label="Permalink: Creating T-SNE-style word embedding projection plots" href="#creating-t-sne-style-word-embedding-projection-plots"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Scattertext makes it easy to create word-similarity plots using projections of word embeddings as the x and y-axes. In the example below, we create a stop-listed Corpus with only unigram terms. The <code>produce_projection_explorer</code> function by uses Gensim to create word embeddings and then projects them to two dimentions using Uniform Manifold Approximation and Projection (UMAP).</p> <p dir="auto">UMAP is chosen over T-SNE because it can employ the cosine similarity between two word vectors instead of just the euclidean distance.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="convention_df = st.SampleCorpora.ConventionData2012.get_data() convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences) corpus = (st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse') .build().get_stoplisted_unigram_corpus()) html = st.produce_projection_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', metadata=convention_df.speaker)"><pre><span class="pl-s1">convention_df</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">SampleCorpora</span>.<span class="pl-c1">ConventionData2012</span>.<span class="pl-c1">get_data</span>() <span class="pl-s1">convention_df</span>[<span class="pl-s">'parse'</span>] <span class="pl-c1">=</span> <span class="pl-s1">convention_df</span>[<span class="pl-s">'text'</span>].<span class="pl-c1">apply</span>(<span class="pl-s1">st</span>.<span class="pl-c1">whitespace_nlp_with_sentences</span>) <span class="pl-s1">corpus</span> <span class="pl-c1">=</span> (<span class="pl-s1">st</span>.<span class="pl-c1">CorpusFromParsedDocuments</span>(<span class="pl-s1">convention_df</span>, <span class="pl-s1">category_col</span><span class="pl-c1">=</span><span class="pl-s">'party'</span>, <span class="pl-s1">parsed_col</span><span class="pl-c1">=</span><span class="pl-s">'parse'</span>) .<span class="pl-c1">build</span>().<span class="pl-c1">get_stoplisted_unigram_corpus</span>()) <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_projection_explorer</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>.<span class="pl-c1">speaker</span>)</pre></div> <p dir="auto">In order to use custom word embedding functions or projection functions, pass models into the <code>word2vec_model</code> and <code>projection_model</code> parameters. In order to use T-SNE, for example, use <code>projection_model=sklearn.manifold.TSNE()</code>.</p> <div class="highlight highlight-source-python notranslate position-relative overflow-auto" dir="auto" data-snippet-clipboard-copy-content="import umap from gensim.models.word2vec import Word2Vec html = st.produce_projection_explorer(corpus, word2vec_model=Word2Vec(size=100, window=5, min_count=10, workers=4), projection_model=umap.UMAP(min_dist=0.5, metric='cosine'), category='democrat', category_name='Democratic', not_category_name='Republican', metadata=convention_df.speaker) "><pre><span class="pl-k">import</span> <span class="pl-s1">umap</span> <span class="pl-k">from</span> <span class="pl-s1">gensim</span>.<span class="pl-s1">models</span>.<span class="pl-s1">word2vec</span> <span class="pl-k">import</span> <span class="pl-v">Word2Vec</span> <span class="pl-s1">html</span> <span class="pl-c1">=</span> <span class="pl-s1">st</span>.<span class="pl-c1">produce_projection_explorer</span>(<span class="pl-s1">corpus</span>, <span class="pl-s1">word2vec_model</span><span class="pl-c1">=</span><span class="pl-en">Word2Vec</span>(<span class="pl-s1">size</span><span class="pl-c1">=</span><span class="pl-c1">100</span>, <span class="pl-s1">window</span><span class="pl-c1">=</span><span class="pl-c1">5</span>, <span class="pl-s1">min_count</span><span class="pl-c1">=</span><span class="pl-c1">10</span>, <span class="pl-s1">workers</span><span class="pl-c1">=</span><span class="pl-c1">4</span>), <span class="pl-s1">projection_model</span><span class="pl-c1">=</span><span class="pl-s1">umap</span>.<span class="pl-c1">UMAP</span>(<span class="pl-s1">min_dist</span><span class="pl-c1">=</span><span class="pl-c1">0.5</span>, <span class="pl-s1">metric</span><span class="pl-c1">=</span><span class="pl-s">'cosine'</span>), <span class="pl-s1">category</span><span class="pl-c1">=</span><span class="pl-s">'democrat'</span>, <span class="pl-s1">category_name</span><span class="pl-c1">=</span><span class="pl-s">'Democratic'</span>, <span class="pl-s1">not_category_name</span><span class="pl-c1">=</span><span class="pl-s">'Republican'</span>, <span class="pl-s1">metadata</span><span class="pl-c1">=</span><span class="pl-s1">convention_df</span>.<span class="pl-c1">speaker</span>) </pre></div> <p dir="auto"><a href="https://jasonkessler.github.io/demo_tsne_style.html" rel="nofollow"><img src="https://camo.githubusercontent.com/e1ef8a377a9b14fba6e0094d2c518ed479d4db638204c11902456c430b66149c/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f74736e655f7374796c652e706e67" alt="t-sne style plot" data-canonical-src="https://jasonkessler.github.io/demo_tsne_style.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Using SVD to visualize any kind of word embeddings</h3><a id="user-content-using-svd-to-visualize-any-kind-of-word-embeddings" class="anchor" aria-label="Permalink: Using SVD to visualize any kind of word embeddings" href="#using-svd-to-visualize-any-kind-of-word-embeddings"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Term positions can also be determined by the positions of terms according to the output of principal component analysis, and <code>produce_projection_explorer</code> also supports this functionality. We'll look at how axes transformations ("scalers" in Scattertext terminology) can make it easier to inspect the output of PCA.</p> <p dir="auto">We'll use the 2012 Conventions corpus for these visualizations. Only unigrams occurring in at least three documents will be considered.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() &gt;&gt;&gt; convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences) &gt;&gt;&gt; corpus = (st.CorpusFromParsedDocuments(convention_df, ... category_col='party', ... parsed_col='parse') ... .build() ... .get_stoplisted_unigram_corpus() ... .remove_infrequent_words(minimum_term_count=3, term_ranker=st.OncePerDocFrequencyRanker))"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; convention_df = st.SampleCorpora.ConventionData2012.get_data() &gt;&gt;&gt; convention_df['parse'] = convention_df['text'].apply(st.whitespace_nlp_with_sentences) &gt;&gt;&gt; corpus = (st.CorpusFromParsedDocuments(convention_df, ... category_col='party', ... parsed_col='parse') ... .build() ... .get_stoplisted_unigram_corpus() ... .remove_infrequent_words(minimum_term_count=3, term_ranker=st.OncePerDocFrequencyRanker)) </code></pre></div> <p dir="auto">Next, we use scikit-learn's tf-idf transformer to find very simple, sparse embeddings for all of these words. Since, we input a #docs x #terms matrix to the transformer, we can transpose it to get a proper term-embeddings matrix, where each row corresponds to a term, and the columns correspond to document-specific tf-idf scores.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; from sklearn.feature_extraction.text import TfidfTransformer &gt;&gt;&gt; embeddings = TfidfTransformer().fit_transform(corpus.get_term_doc_mat()) &gt;&gt;&gt; embeddings.shape (189, 2159) &gt;&gt;&gt; corpus.get_num_docs(), corpus.get_num_terms() (189, 2159) &gt;&gt;&gt; embeddings = embeddings.T &gt;&gt;&gt; embeddings.shape (2159, 189)"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; from sklearn.feature_extraction.text import TfidfTransformer &gt;&gt;&gt; embeddings = TfidfTransformer().fit_transform(corpus.get_term_doc_mat()) &gt;&gt;&gt; embeddings.shape (189, 2159) &gt;&gt;&gt; corpus.get_num_docs(), corpus.get_num_terms() (189, 2159) &gt;&gt;&gt; embeddings = embeddings.T &gt;&gt;&gt; embeddings.shape (2159, 189) </code></pre></div> <p dir="auto">Given these spare embeddings, we can apply sparse singular value decomposition to extract three factors. SVD outputs factorizes the term embeddings matrix into three matrices, U, Σ, and VT. Importantly, the matrix U provides the singular values for each term, and VT provides them for each document, and Σ is a vector of the singular values.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; from scipy.sparse.linalg import svds &gt;&gt;&gt; U, S, VT = svds(embeddings, k = 3, maxiter=20000, which='LM') &gt;&gt;&gt; U.shape (2159, 3) &gt;&gt;&gt; S.shape (3,) &gt;&gt;&gt; VT.shape (3, 189)"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; from scipy.sparse.linalg import svds &gt;&gt;&gt; U, S, VT = svds(embeddings, k = 3, maxiter=20000, which='LM') &gt;&gt;&gt; U.shape (2159, 3) &gt;&gt;&gt; S.shape (3,) &gt;&gt;&gt; VT.shape (3, 189) </code></pre></div> <p dir="auto">We'll look at the first two singular values, plotting each term such that the x-axis position is the first singular value, and the y-axis term is the second. To do this, we make a "projection" data frame, where the <code>x</code> and <code>y</code> columns store the first two singular values, and key the data frame on each term. This controls the term positions on the chart.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; x_dim = 0; y_dim = 1; &gt;&gt;&gt; projection = pd.DataFrame({'term':corpus.get_terms(), ... 'x':U.T[x_dim], ... 'y':U.T[y_dim]}).set_index('term')"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; x_dim = 0; y_dim = 1; &gt;&gt;&gt; projection = pd.DataFrame({'term':corpus.get_terms(), ... 'x':U.T[x_dim], ... 'y':U.T[y_dim]}).set_index('term') </code></pre></div> <p dir="auto">We'll use the <code>produce_pca_explorer</code> function to visualize these. Note we include the projection object, and specify which singular values were used for x and y (<code>x_dim</code> and <code>y_dim</code>) so we they can be labeled in the interactive visualization.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="html = st.produce_pca_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', projection=projection, metadata=convention_df['speaker'], width_in_pixels=1000, x_dim=x_dim, y_dim=y_dim)"><pre lang="pydocstring" class="notranslate"><code>html = st.produce_pca_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', projection=projection, metadata=convention_df['speaker'], width_in_pixels=1000, x_dim=x_dim, y_dim=y_dim) </code></pre></div> <p dir="auto">Click for an interactive visualization.<br> <a href="https://jasonkessler.github.io/demo_embeddings_svd_0_1.html" rel="nofollow"><img src="https://camo.githubusercontent.com/b1112c4d589e3e1d2729b3c8f98b16921fb3e60c0fe850ee3946b46f737f27b9/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f737664312e706e67" alt="pca" data-canonical-src="https://jasonkessler.github.io/svd1.png" style="max-width: 100%;"></a></p> <p dir="auto">We can easily re-scale the plot in order to make more efficient use of space. For example, passing in <code>scaler=scale_neg_1_to_1_with_zero_mean</code> will make all four quadrants take equal area.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="html = st.produce_pca_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', projection=projection, metadata=convention_df['speaker'], width_in_pixels=1000, scaler=st.scale_neg_1_to_1_with_zero_mean, x_dim=x_dim, y_dim=y_dim)"><pre lang="pydocstring" class="notranslate"><code>html = st.produce_pca_explorer(corpus, category='democrat', category_name='Democratic', not_category_name='Republican', projection=projection, metadata=convention_df['speaker'], width_in_pixels=1000, scaler=st.scale_neg_1_to_1_with_zero_mean, x_dim=x_dim, y_dim=y_dim) </code></pre></div> <p dir="auto">Click for an interactive visualization.<br> <a href="https://jasonkessler.github.io/demo_embeddings_svd_0_1_scale_neg_1_to_1_with_zero_mean.html" rel="nofollow"><img src="https://camo.githubusercontent.com/6f7a8b248e2e50203363f75fa8e33794f4677c38f18fe42a2d093a2155fc0750/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f737664322e706e67" alt="pca" data-canonical-src="https://jasonkessler.github.io/svd2.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">Exporting plot to matplotlib</h3><a id="user-content-exporting-plot-to-matplotlib" class="anchor" aria-label="Permalink: Exporting plot to matplotlib" href="#exporting-plot-to-matplotlib"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">To export the content of a scattertext explorer object (ScattertextStructure) to matplotlib you can use <code>produce_scattertext_pyplot</code>. The function returns a <code>matplotlib.figure.Figure</code> object which can be visualized using <code>plt.show</code> or <code>plt.savefig</code> as in the example below.</p> <p dir="auto">Note that installation of textalloc==0.0.3 and matplotlib&gt;=3.6.0 is required before running this.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="convention_df = st.SampleCorpora.ConventionData2012.get_data().assign( parse = lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ) corpus = st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse').build() scattertext_structure = st.produce_scattertext_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=5, pmi_threshold_coefficient=8, width_in_pixels=1000, return_scatterplot_structure=True, ) fig = st.produce_scattertext_pyplot(scattertext_structure) fig.savefig('pyplot_export.png', format='png')"><pre lang="pydocstring" class="notranslate"><code>convention_df = st.SampleCorpora.ConventionData2012.get_data().assign( parse = lambda df: df.text.apply(st.whitespace_nlp_with_sentences) ) corpus = st.CorpusFromParsedDocuments(convention_df, category_col='party', parsed_col='parse').build() scattertext_structure = st.produce_scattertext_explorer( corpus, category='democrat', category_name='Democratic', not_category_name='Republican', minimum_term_frequency=5, pmi_threshold_coefficient=8, width_in_pixels=1000, return_scatterplot_structure=True, ) fig = st.produce_scattertext_pyplot(scattertext_structure) fig.savefig('pyplot_export.png', format='png') </code></pre></div> <p dir="auto">[<a target="_blank" rel="noopener noreferrer nofollow" href="https://camo.githubusercontent.com/0e6b7f37dbfe4f4e50ace8cf8a9a759caf0fafb44665035c7163e03e1148f8e8/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f7079706c6f745f6578706f72742e706e67"><img src="https://camo.githubusercontent.com/0e6b7f37dbfe4f4e50ace8cf8a9a759caf0fafb44665035c7163e03e1148f8e8/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f7079706c6f745f6578706f72742e706e67" alt="pyplot" data-canonical-src="https://jasonkessler.github.io/pyplot_export.png" style="max-width: 100%;"></a>]</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Examples</h2><a id="user-content-examples" class="anchor" aria-label="Permalink: Examples" href="#examples"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Please see the examples in the <a href="https://github.com/JasonKessler/Scattertext-PyData">PyData 2017 Tutorial</a> on Scattertext.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">A note on chart layout</h2><a id="user-content-a-note-on-chart-layout" class="anchor" aria-label="Permalink: A note on chart layout" href="#a-note-on-chart-layout"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto"><a href="https://github.com/uwplse/cozy">Cozy: The Collection Synthesizer</a> (Loncaric 2016) was used to help determine which terms could be labeled without overlapping a circle or another label. It automatically built a data structure to efficiently store and query the locations of each circle and labeled term.</p> <p dir="auto">The script to build <code>rectangle-holder.js</code> was</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="fields ax1 : long, ay1 : long, ax2 : long, ay2 : long assume ax1 &lt; ax2 and ay1 &lt; ay2 query findMatchingRectangles(bx1 : long, by1 : long, bx2 : long, by2 : long) assume bx1 &lt; bx2 and by1 &lt; by2 ax1 &lt; bx2 and ax2 &gt; bx1 and ay1 &lt; by2 and ay2 &gt; by1"><pre class="notranslate"><code>fields ax1 : long, ay1 : long, ax2 : long, ay2 : long assume ax1 &lt; ax2 and ay1 &lt; ay2 query findMatchingRectangles(bx1 : long, by1 : long, bx2 : long, by2 : long) assume bx1 &lt; bx2 and by1 &lt; by2 ax1 &lt; bx2 and ax2 &gt; bx1 and ay1 &lt; by2 and ay2 &gt; by1 </code></pre></div> <p dir="auto">And it was called using</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="$ python2.7 src/main.py &lt;script file name&gt; --enable-volume-trees \ --js-class RectangleHolder --enable-hamt --enable-arrays --js rectangle_holder.js"><pre class="notranslate"><code>$ python2.7 src/main.py &lt;script file name&gt; --enable-volume-trees \ --js-class RectangleHolder --enable-hamt --enable-arrays --js rectangle_holder.js </code></pre></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">What's new</h2><a id="user-content-whats-new" class="anchor" aria-label="Permalink: What's new" href="#whats-new"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.64</h2><a id="user-content-00264" class="anchor" aria-label="Permalink: 0.0.2.64" href="#00264"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Adding in code to ensure that term statistics will show up even if no documents are present in visualization.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.60</h2><a id="user-content-00260" class="anchor" aria-label="Permalink: 0.0.2.60" href="#00260"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Better axis labeling (see demo_axis_crossbars_and_labels.py).</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.59</h2><a id="user-content-00259" class="anchor" aria-label="Permalink: 0.0.2.59" href="#00259"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Pytextrank compatibility</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.57-58</h2><a id="user-content-00257-58" class="anchor" aria-label="Permalink: 0.0.2.57-58" href="#00257-58"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Ensuring Pandas 1.0 compatibility fixing Issue #51 and scikit-learn stopwords import issue in #49.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.44:</h2><a id="user-content-00244" class="anchor" aria-label="Permalink: 0.0.2.44:" href="#00244"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Added the following classes to support rank-based feature-selection: <code>AssociationCompactorByRank</code>, <code>TermCategoryRanker</code>.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.43:</h2><a id="user-content-00243" class="anchor" aria-label="Permalink: 0.0.2.43:" href="#00243"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Made the term pop-up box on the category pairplot only the category name</li> <li>Fixed optimal projection search function</li> <li>Merged PR from @millengustavo to fix when a FutureWarning is issued every time the get_background_frequency_df is called.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.42:</h2><a id="user-content-00242" class="anchor" aria-label="Permalink: 0.0.2.42:" href="#00242"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Fixed clickablity of terms, coloring in certain plots</li> <li>Added initial number of terms to show in pairplot, using the <code>terms_to_show</code> parameter</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.41:</h2><a id="user-content-00241" class="anchor" aria-label="Permalink: 0.0.2.41:" href="#00241"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Enabled changing protocol in pair plot</li> <li>Fixed semiotic square creator</li> <li>Added <code>use_categories_as_metadata_and_replace_terms</code> to <code>TermDocMatrix</code>.</li> <li>Added <code>get_metadata_doc_count_df</code> and <code>get_metadata_count_mat</code> to TermDocMatrix</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.40:</h2><a id="user-content-00240" class="anchor" aria-label="Permalink: 0.0.2.40:" href="#00240"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Added categories to terms in pair plot halo, made them clickable</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.39:</h2><a id="user-content-00239" class="anchor" aria-label="Permalink: 0.0.2.39:" href="#00239"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Fixing failing test case</li> <li>Adding halo to pair plot</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.38:</h2><a id="user-content-00238" class="anchor" aria-label="Permalink: 0.0.2.38:" href="#00238"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Fixed term preview/clickability in semiotic square plots</li> <li>Fixed search box</li> <li>Added preliminary <code>produce_pairplot</code></li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.37:</h2><a id="user-content-00237" class="anchor" aria-label="Permalink: 0.0.2.37:" href="#00237"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Javascript changes to support multiple plots on a single page.</li> <li>Added <code>ScatterChart.hide_terms(terms: iter[str])</code> which enables selected terms to be hidden from the chart.</li> <li>Added <code>ScatterChartData.score_transform</code> to specify the function which can change an original score into a value between 0 and 1 used for term coloring.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.36:</h2><a id="user-content-00236" class="anchor" aria-label="Permalink: 0.0.2.36:" href="#00236"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Added <code>alternative_term_func</code> to <code>produce_scattertext_explorer</code> which allows you to inject a function that activates when a term is clicked.</li> <li>Fixed Cohen's d calculation, and added <code>HedgesG</code>, and unbiased version of Cohen's d which is a subclass of <code>CohensD</code>.</li> <li>Added the <code>frequency_transform</code> parameter to <code>produce_frequency_explorer</code>. This defaults to a log transform, but allows you to use any way your heart desires to order terms along the x-axis.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.35:</h2><a id="user-content-00235" class="anchor" aria-label="Permalink: 0.0.2.35:" href="#00235"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>Added <code>show_category_headings=True</code> to <code>produce_scattertext_explorer</code>. Setting this to False suppresses the list of categories which will be displayed in the term context area.</li> <li>Added <code>div_name</code> argument to <code>produce_scattertext_explorer</code> and name-spaced important divs and classes by <code>div_name</code> in HTML templates and Javascript.</li> <li>Added <code>show_cross_axes=True</code> to <code>produce_scattertext_explorer</code>. Setting this to <code>False</code> prevents the cross axes from being displayed if <code>show_axes</code> is <code>True</code>.</li> <li>Changed default scorer to RankDifference.</li> <li>Made sure that term contexts were properly shown in all configurations.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.34:</h2><a id="user-content-00234" class="anchor" aria-label="Permalink: 0.0.2.34:" href="#00234"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li><code>TermDocMatrix.get_metadata_freq_df</code> now accepts the <code>label_append</code> argument which by default adds <code>' freq'</code> to the end of each column.</li> <li><code>TermDocMatrix.get_num_cateogires</code> returns the number of categories in a term-document matrix.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.33:</h2><a id="user-content-00233" class="anchor" aria-label="Permalink: 0.0.2.33:" href="#00233"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added the following methods:</p> <ul dir="auto"> <li><code>TermDocMatrixWithoutCategories.get_num_metadata</code></li> <li><code>TermDocMatrix.use_metadata_as_categories</code></li> <li><code>unified_context</code> argument in <code>produce_scattertext_explorer</code> lists all contexts in a single column. This let's you see snippets organized by multiple categories in a single column. See <code>demo_unified_context.py</code> for an example.<br> helps category-free or multi-category analyses.</li> </ul> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.2.32</h2><a id="user-content-00232" class="anchor" aria-label="Permalink: 0.0.2.32" href="#00232"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added a series of objects to handle uncategorized corpora. Added section on <a href="#document-based-scatterplots">Document-Based Scatterplots</a>, and the add_doc_names_as_metadata function. <code>CategoryColorAssigner</code> was also added to assign colors to a qualitative categories.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">0.0.28-31</h2><a id="user-content-0028-31" class="anchor" aria-label="Permalink: 0.0.28-31" href="#0028-31"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">A number of new term scoring approaches including <code>RelativeEntropy</code> (a direct implementation of Frankhauser et al. ( 2014)), and <code>ZScores</code> and implementation of the Z-Score model used in Frankhauser et al.</p> <p dir="auto"><code>TermDocMatrix.get_metadata_freq_df()</code> returns a metadata-doc corpus.</p> <p dir="auto"><code>CorpusBasedTermScorer.set_ranker</code> allows you to use a different term ranker when finding corpus-based scores. This not only lets these scorers with metadata, but also allows you to integrate once-per-document counts.</p> <p dir="auto">Fixed <code>produce_projection_explorer</code> such that it can work with a predefined set of term embeddings. This can allow, for example, the easy exploration of one hot-encoded term embeddings in addition to arbitrary lower-dimensional embeddings.</p> <p dir="auto">Added <code>add_metadata</code> to <code>TermDocMatrix</code> in order to inject meta data after a TermDocMatrix object has been created.</p> <p dir="auto">Made sure tooltip never started above the top of the web page.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.28</h3><a id="user-content-00228" class="anchor" aria-label="Permalink: 0.0.2.28" href="#00228"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added <code>DomainCompactor</code>.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.26-27.1</h3><a id="user-content-00226-271" class="anchor" aria-label="Permalink: 0.0.2.26-27.1" href="#00226-271"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed bug <a href="https://github.com/JasonKessler/scattertext/issues/31" data-hovercard-type="issue" data-hovercard-url="/JasonKessler/scattertext/issues/31/hovercard">#31</a>, enabling context to show when metadata value is clicked.</p> <p dir="auto">Enabled display of terms in topic models in explorer, along with the the display of customized topic models. Please see <a href="#visualizing-topic-models">Visualizing topic models</a> for an overview of the additions.</p> <p dir="auto">Removed pkg_resources from Phrasemachine, corrected demo_phrase_machine.py</p> <p dir="auto">Now compatible with Gensim 3.4.0.</p> <p dir="auto">Added characteristic explorer, <code>produce_characteristic_explorer</code>, to plot terms with their characteristic scores on the x-axis and their class-association scores on the y-axis. See <a href="#ordering-terms-by-corpus-characteristicness">Ordering Terms by Corpus Characteristicness</a> for more details.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.24-25</h3><a id="user-content-00224-25" class="anchor" aria-label="Permalink: 0.0.2.24-25" href="#00224-25"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added <code>TermCategoryFrequencies</code> in response to Issue 23. Please see <a href="#visualizing-differences-based-on-only-term-frequencies">Visualizing differences based on only term frequencies</a> for more details.</p> <p dir="auto">Added <code>x_axis_labels</code> and <code>y_axis_labels</code> parameters to <code>produce_scattertext_explorer</code>. These let you include evenly-spaced string axis labels on the chart, as opposed to just "Low", "Medium" and "High". These rely on d3's ticks function, which can behave unpredictable. Caveat usor.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.16-23.1</h3><a id="user-content-00216-231" class="anchor" aria-label="Permalink: 0.0.2.16-23.1" href="#00216-231"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Semiotic Squares now look better, and have customizable labels.</p> <p dir="auto">Incorporated the <a href="http://www.wjh.harvard.edu/~inquirer/homecat.htm" rel="nofollow">General Inquirer</a> lexicon. For non-commercial use only. The lexicon is downloaded from their homepage at the start of each use. See <code>demo_general_inquierer.py</code>.</p> <p dir="auto">Incorporated Phrasemachine from <a href="https://github.com/AbeHandler">AbeHandler</a> (Handler et al. 2016). For the license, please see <code>PhraseMachineLicense.txt</code>. For an example, please see <code>demo_phrase_machine.py</code>.</p> <p dir="auto">Added <code>CompactTerms</code> for removing redundant and infrequent terms from term document matrices. These occur if a word or phrase is always part of a larger phrase; the shorter phrase is considered redundant and removed from the corpus. See <code>demo_phrase_machine.py</code> for an example.</p> <p dir="auto">Added <code>FourSquare</code>, a pattern that allows for the creation of a semiotic square with separate categories for each corner. Please see <code>demo_four_square.py</code> for an early example.</p> <p dir="auto">Finally, added a way to easily perform T-SNE-style visualizations on a categorized corpus. This uses, by default, the <a href="https://github.com/lmcinnes/umap">umap-learn</a> package. Please see demo_tsne_style.py.</p> <p dir="auto">Fixed to <code>ScaledFScorePresets(one_to_neg_one=True)</code>, added <code>UnigramsFromSpacyDoc</code>.</p> <p dir="auto">Now, when using <code>CorpusFromPandas</code>, a <code>CorpusDF</code> object is returned, instead of a <code>Corpus</code> object. This new type of object keeps a reference to the source data frame, and returns it via the <code>CorpusDF.get_df()</code> method.</p> <p dir="auto">The factory <code>CorpusFromFeatureDict</code> was added. It allows you to directly specify term counts and metadata item counts within the dataframe. Please see <code>test_corpusFromFeatureDict.py</code> for an example.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.15-16</h3><a id="user-content-00215-16" class="anchor" aria-label="Permalink: 0.0.2.15-16" href="#00215-16"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added a very semiotic square creator.</p> <p dir="auto">The idea to build a semiotic square that contrasts two categories in a Term Document Matrix while using other categories as neutral categories.</p> <p dir="auto">See <a href="#creating-semiotic-squares">Creating semiotic squares</a> for an overview on how to use this functionality and semiotic squares.</p> <p dir="auto">Added a parameter to disable the display of the top-terms sidebar, e.g., <code>produce_scattertext_explorer(..., show_top_terms=False, ...)</code>.</p> <p dir="auto">An interface to part of the subjectivity/sentiment dataset from Bo Pang and Lillian Lee. ``A Sentimental Education: Sentiment Analysis Using Subjectivity Summarization Based on Minimum Cuts''. ACL. 2004. See <code>SampleCorpora.RottenTomatoes</code>.</p> <p dir="auto">Fixed bug that caused tooltip placement to be off after scrolling.</p> <p dir="auto">Made <code>category_name</code> and <code>not_category_name</code> optional in <code>produce_scattertext_explorer</code> etc.</p> <p dir="auto">Created the ability to customize tooltips via the <code>get_tooltip_content</code> argument to <code>produce_scattertext_explorer</code> etc., control axes labels via <code>x_axis_values</code> and <code>y_axis_values</code>. The <code>color_func</code> parameter is a Javascript function to control color of a point. Function takes a parameter which is a dictionary entry produced by <code>ScatterChartExplorer.to_dict</code> and returns a string.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.14</h3><a id="user-content-00214" class="anchor" aria-label="Permalink: 0.0.2.14" href="#00214"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Integration with Scikit-Learn's text-analysis pipeline led the creation of the <code>CorpusFromScikit</code> and <code>TermDocMatrixFromScikit</code> classes.</p> <p dir="auto">The <code>AutoTermSelector</code> class to automatically suggest terms to appear in the visualization.<br> This can make it easier to show large data sets, and remove fiddling with the various minimum term frequency parameters.</p> <p dir="auto">For an example of how to use <code>CorpusFromScikit</code> and <code>AutoTermSelector</code>, please see <code>demo_sklearn.py</code></p> <p dir="auto">Also, I updated the library and examples to be compatible with spaCy 2.</p> <p dir="auto">Fixed bug when processing single-word documents, and set the default beta to 2.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.11-13</h3><a id="user-content-00211-13" class="anchor" aria-label="Permalink: 0.0.2.11-13" href="#00211-13"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added <code>produce_frequency_explorer</code> function, and adding the PEP 369-compliant <code>__version__</code> attribute as mentioned in <a href="https://github.com/JasonKessler/scattertext/issues/19" data-hovercard-type="issue" data-hovercard-url="/JasonKessler/scattertext/issues/19/hovercard">#19</a>. Fixed bug when creating visualizations with more than two possible categories. Now, by default, category names will not be title-cased in the visualization, but will retain their original case.<br> If you'd still like to do this this, use <code>ScatterChart (or a descendant).to_dict(..., title_case_names=True)</code>. Fixed <code>DocsAndLabelsFromCorpus</code> for Py 2 compatibility.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.10</h3><a id="user-content-00210" class="anchor" aria-label="Permalink: 0.0.2.10" href="#00210"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed bugs in <code>chinese_nlp</code> when jieba has already been imported and in p-value computation when performing log-odds-ratio w/ prior scoring.</p> <p dir="auto">Added demo for performing a Monroe et. al (2008) style visualization of log-odds-ratio scores in <code>demo_log_odds_ratio_prior.py</code>.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.9.*</h3><a id="user-content-0029" class="anchor" aria-label="Permalink: 0.0.2.9.*" href="#0029"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Breaking change: <code>pmi_filter_thresold</code> has been replaced with <code>pmi_threshold_coefficient</code>.</p> <p dir="auto">Added Emoji and Tweet analysis. See <a href="#emoji-analysis">Emoji analysis</a>.</p> <p dir="auto">Characteristic terms falls back ot "Most frequent" if no terms used in the chart are present in the background corpus.</p> <p dir="auto">Fixed top-term calculation for custom scores.</p> <p dir="auto">Set scaled f-score's default beta to 0.5.</p> <p dir="auto">Added <code>--spacy_language_model</code> argument to the CLI.</p> <p dir="auto">Added the <code>alternative_text_field</code> option in <code>produce_scattertext_explorer</code> to show an alternative text field when showing contexts in the interactive HTML visualization.</p> <p dir="auto">Updated <code>ParsedCorpus.get_unigram_corpus</code> to allow for continued <code>alternative_text_field</code> functionality.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.8.6</h3><a id="user-content-00286" class="anchor" aria-label="Permalink: 0.0.2.8.6" href="#00286"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added ability to for Scattertext to use noun chunks instead of unigrams and bigrams through the <code>FeatsFromSpacyDocOnlyNounChunks</code> class. In order to use it, run your favorite <code>Corpus</code> or <code>TermDocMatrix</code> factory, and pass in an instance of the class as a parameter:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="st.CorpusFromParsedDocuments(..., feats_from_spacy_doc=st.FeatsFromSpacyDocOnlyNounChunks())"><pre class="notranslate"><code>st.CorpusFromParsedDocuments(..., feats_from_spacy_doc=st.FeatsFromSpacyDocOnlyNounChunks()) </code></pre></div> <p dir="auto">Fixed a bug in corpus construction that occurs when the last document has no features.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.8.5</h3><a id="user-content-00285" class="anchor" aria-label="Permalink: 0.0.2.8.5" href="#00285"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Now you don't have to install tinysegmenter to use Scattertext. But you need to install it if you want to parse Japanese. This caused a problem when Scattertext was being installed on Windows.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.8.1-4</h3><a id="user-content-00281-4" class="anchor" aria-label="Permalink: 0.0.2.8.1-4" href="#00281-4"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added <code>TermDocMatrix.get_corner_score</code>, giving an improved version of the Rudder Score. Exposing <code>whitespace_nlp_with_sentences</code>. It's a lightweight bad regex sentence splitter built a top a bad regex tokenizer that somewhat apes spaCy's API. Use it if you don't have spaCy and the English model downloaded or if you care more about memory footprint and speed than accuracy.</p> <p dir="auto">It's not compatible with <code>word_similarity_explorer</code> but is compatible with `word_similarity_explorer_gensim'.</p> <p dir="auto">Tweaked scaled f-score normalization.</p> <p dir="auto">Fixed Javascript bug when clicking on '$'.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.8.0</h3><a id="user-content-00280" class="anchor" aria-label="Permalink: 0.0.2.8.0" href="#00280"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed bug in Scaled F-Score computations, and changed computation to better score words that are inversely correlated to category.</p> <p dir="auto">Added <code>Word2VecFromParsedCorpus</code> to automate training Gensim word vectors from a corpus, and<br> <code>word_similarity_explorer_gensim</code> to produce the visualization.</p> <p dir="auto">See <code>demo_gensim_similarity.py</code> for an example.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.7.1</h3><a id="user-content-00271" class="anchor" aria-label="Permalink: 0.0.2.7.1" href="#00271"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added the <code>d3_url</code> and <code>d3_scale_chromatic_url</code> parameters to <code>produce_scattertext_explorer</code>. This provides a way to manually specify the paths to "d3.js" (i.e., the file from "<a href="https://cdnjs.cloudflare.com/ajax/libs/d3/4.6.0/d3.min.js" rel="nofollow">https://cdnjs.cloudflare.com/ajax/libs/d3/4.6.0/d3.min.js</a>") and "d3-scale-chromatic.v1.js" (i.e., the file from "<a href="https://d3js.org/d3-scale-chromatic.v1.min.js" rel="nofollow">https://d3js.org/d3-scale-chromatic.v1.min.js</a>").</p> <p dir="auto">This is important if you're getting the error:</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="Javascript error adding output! TypeError: d3.scaleLinear is not a function See your browser Javascript console for more details."><pre class="notranslate"><code>Javascript error adding output! TypeError: d3.scaleLinear is not a function See your browser Javascript console for more details. </code></pre></div> <p dir="auto">It also lets you use Scattertext if you're serving in an environment with no (or a restricted) external Internet connection.</p> <p dir="auto">For example, if "d3.min.js" and "d3-scale-chromatic.v1.min.js" were present in the current working directory, calling the following code would reference them locally instead of the remote Javascript files. See <a href="#visualizing-term-associations">Visualizing term associations</a> for code context.</p> <div class="snippet-clipboard-content notranslate position-relative overflow-auto" data-snippet-clipboard-copy-content="&gt;&gt;&gt; html = st.produce_scattertext_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... width_in_pixels=1000, ... metadata=convention_df['speaker'], ... d3_url='d3.min.js', ... d3_scale_chromatic_url='d3-scale-chromatic.v1.min.js')"><pre lang="pydocstring" class="notranslate"><code>&gt;&gt;&gt; html = st.produce_scattertext_explorer(corpus, ... category='democrat', ... category_name='Democratic', ... not_category_name='Republican', ... width_in_pixels=1000, ... metadata=convention_df['speaker'], ... d3_url='d3.min.js', ... d3_scale_chromatic_url='d3-scale-chromatic.v1.min.js') </code></pre></div> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.7.0</h3><a id="user-content-00270" class="anchor" aria-label="Permalink: 0.0.2.7.0" href="#00270"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed a bug in 0.0.2.6.0 that transposed default axis labels.</p> <p dir="auto">Added a Japanese mode to Scattertext. See <code>demo_japanese.py</code> for an example of how to use Japanese. Please run <code>pip install tinysegmenter</code> to parse Japanese.</p> <p dir="auto">Also, the <code>chiense_mode</code> boolean parameter in <code>produce_scattertext_explorer</code> has been renamed to <code>asian_mode</code>.</p> <p dir="auto">For example, the output of <code>demo_japanese.py</code> is: <a href="https://jasonkessler.github.io/demo_japanese.html" rel="nofollow"><img src="https://camo.githubusercontent.com/716b080413e9cf82b732f14a19e79a886dc1a47b4fc5f7a8a0619590a4a76707/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f64656d6f5f6a6170616e6573652e706e67" alt="demo_japanese.html" data-canonical-src="https://jasonkessler.github.io/demo_japanese.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.6.0</h3><a id="user-content-00260-1" class="anchor" aria-label="Permalink: 0.0.2.6.0" href="#00260-1"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Custom term positions and axis labels. Although not recommended, you can visualize different metrics on each axis in visualizations similar to Monroe et al. (2008). Please see <a href="#custom-term-positions">Custom term positions</a> for more info.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.5.0</h3><a id="user-content-00250" class="anchor" aria-label="Permalink: 0.0.2.5.0" href="#00250"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Enhanced the visualization of query-based categorical differences, a.k.a the <code>word_similarity_explorer</code> function. When run, a plot is produced that contains category associated terms colored in either red or blue hues, and terms not associated with either class colored in greyscale and slightly smaller. The intensity of each color indicates association with the query term. For example:</p> <p dir="auto"><a href="https://jasonkessler.github.io/Convention-Visualization-Jobs.html" rel="nofollow"><img src="https://camo.githubusercontent.com/fddd4924eacbf55f3b6f43c1eef3f7a67732e2c45e78ad1915acba29b3b369ae/68747470733a2f2f6a61736f6e6b6573736c65722e6769746875622e696f2f436f6e76656e74696f6e732d56697a2d4578706c616e6174696f6e2e706e67" alt="Convention-Visualization-Jobs.html" data-canonical-src="https://jasonkessler.github.io/Conventions-Viz-Explanation.png" style="max-width: 100%;"></a></p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.4.6</h3><a id="user-content-00246" class="anchor" aria-label="Permalink: 0.0.2.4.6" href="#00246"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Some minor bug fixes, and added a <code>minimum_not_category_term_frequency</code> parameter. This fixes a problem with visualizing imbalanced datasets. It sets a minimum number of times a word that does not appear in the target category must appear before it is displayed.</p> <p dir="auto">Added <code>TermDocMatrix.remove_entity_tags</code> method to remove entity type tags from the analysis.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.4.5</h3><a id="user-content-00245" class="anchor" aria-label="Permalink: 0.0.2.4.5" href="#00245"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed matched snippet not displaying issue <a href="/JasonKessler/scattertext/issues/9">#9</a>, and fixed a Python 2 issue in created a visualization using a <code>ParsedCorpus</code> prepared via <code>CorpusFromParsedDocuments</code>, mentioned in the latter part of the issue <a href="/JasonKessler/scattertext/issues/8">#8</a> discussion.</p> <p dir="auto">Again, Python 2 is supported in experimental mode only.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.4.4</h3><a id="user-content-00244-1" class="anchor" aria-label="Permalink: 0.0.2.4.4" href="#00244-1"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Corrected example links on this Readme.</p> <p dir="auto">Fixed a bug in Issue 8 where the HTML visualization produced by <code>produce_scattertext_html</code> would fail.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.4.2</h3><a id="user-content-00242-1" class="anchor" aria-label="Permalink: 0.0.2.4.2" href="#00242-1"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed a couple issues that rendered Scattertext broken in Python 2. Chinese processing still does not work.</p> <p dir="auto">Note: Use Python 3.4+ if you can.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.4.1</h3><a id="user-content-00241-1" class="anchor" aria-label="Permalink: 0.0.2.4.1" href="#00241-1"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Fixed links in Readme, and made regex NLP available in CLI.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.4</h3><a id="user-content-0024" class="anchor" aria-label="Permalink: 0.0.2.4" href="#0024"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added the command line tool, and fixed a bug related to Empath visualizations.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.3</h3><a id="user-content-0023" class="anchor" aria-label="Permalink: 0.0.2.3" href="#0023"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Ability to see how a particular term is discussed differently between categories through the <code>word_similarity_explorer</code> function.</p> <p dir="auto">Specialized mode to view sparse term scores.</p> <p dir="auto">Fixed a bug that was caused by repeated values in background unigram counts.</p> <p dir="auto">Added true alphabetical term sorting in visualizations.</p> <p dir="auto">Added an optional save-as-SVG button.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.2</h3><a id="user-content-0022" class="anchor" aria-label="Permalink: 0.0.2.2" href="#0022"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Addition option of showing characteristic terms (from the full set of documents) being considered. The option (<code>show_characteristic</code> in <code>produce_scattertext_explorer</code>) is on by default, but currently unavailable for Chinese. If you know of a good Chinese wordcount list, please let me know. The algorithm used to produce these is F-Score.<br> See <a href="http://www.slideshare.net/JasonKessler/turning-unstructured-content-into-kernels-of-ideas/58" rel="nofollow">this and the following slide</a> for more details</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.1.5</h3><a id="user-content-00215" class="anchor" aria-label="Permalink: 0.0.2.1.5" href="#00215"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added document and word count statistics to main visualization.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.1.4</h3><a id="user-content-00214-1" class="anchor" aria-label="Permalink: 0.0.2.1.4" href="#00214-1"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Added preliminary support for visualizing <a href="https://github.com/Ejhfast/empath-client">Empath</a> (Fast 2016) topics categories instead of emotions. See the tutorial for more information.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.1.3</h3><a id="user-content-00213" class="anchor" aria-label="Permalink: 0.0.2.1.3" href="#00213"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Improved term-labeling.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.1.1</h3><a id="user-content-00211" class="anchor" aria-label="Permalink: 0.0.2.1.1" href="#00211"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">Addition of <code>strip_final_period</code> param to <code>FeatsFromSpacyDoc</code> to deal with spaCy tokenization of all-caps documents that can leave periods at the end of terms.</p> <div class="markdown-heading" dir="auto"><h3 tabindex="-1" class="heading-element" dir="auto">0.0.2.1.0</h3><a id="user-content-00210-1" class="anchor" aria-label="Permalink: 0.0.2.1.0" href="#00210-1"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <p dir="auto">I've added support for Chinese, including the ChineseNLP class, which uses a RegExp-based sentence splitter and <a href="https://github.com/fxsjy/jieba">Jieba</a> for word segmentation. To use it, see the <code>demo_chinese.py</code> file. Note that <code>CorpusFromPandas</code> currently does not support ChineseNLP.</p> <p dir="auto">In order for the visualization to work, set the <code>asian_mode</code> flag to <code>True</code> in <code>produce_scattertext_explorer</code>.</p> <div class="markdown-heading" dir="auto"><h2 tabindex="-1" class="heading-element" dir="auto">Sources</h2><a id="user-content-sources" class="anchor" aria-label="Permalink: Sources" href="#sources"><svg class="octicon octicon-link" viewBox="0 0 16 16" version="1.1" width="16" height="16" aria-hidden="true"><path d="m7.775 3.275 1.25-1.25a3.5 3.5 0 1 1 4.95 4.95l-2.5 2.5a3.5 3.5 0 0 1-4.95 0 .751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018 1.998 1.998 0 0 0 2.83 0l2.5-2.5a2.002 2.002 0 0 0-2.83-2.83l-1.25 1.25a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042Zm-4.69 9.64a1.998 1.998 0 0 0 2.83 0l1.25-1.25a.751.751 0 0 1 1.042.018.751.751 0 0 1 .018 1.042l-1.25 1.25a3.5 3.5 0 1 1-4.95-4.95l2.5-2.5a3.5 3.5 0 0 1 4.95 0 .751.751 0 0 1-.018 1.042.751.751 0 0 1-1.042.018 1.998 1.998 0 0 0-2.83 0l-2.5 2.5a1.998 1.998 0 0 0 0 2.83Z"></path></svg></a></div> <ul dir="auto"> <li>2012 Convention Data: scraped from <a href="http://www.nytimes.com/interactive/2012/09/06/us/politics/convention-word-counts.html?_r=0" rel="nofollow">The New York Times.</a></li> <li>count_1w: Peter Norvig assembled this file (downloaded from <a href="http://norvig.com/ngrams/count_1w.txt" rel="nofollow">norvig.com</a>). See <a href="http://norvig.com/ngrams/" rel="nofollow">http://norvig.com/ngrams/</a> for an explanation of how it was gathered from a very large corpus.</li> <li>hamlet.txt: William Shakespeare. From <a href="http://shakespeare.mit.edu/hamlet/full.html" rel="nofollow">shapespeare.mit.edu</a></li> <li>Inspiration for text scatter plots: Rudder, Christian. Dataclysm: Who We Are (When We Think No One's Looking). Random House Incorporated, 2014.</li> <li>Loncaric, Calvin. "Cozy: synthesizing collection data structures." Proceedings of the 2016 24th ACM SIGSOFT International Symposium on Foundations of Software Engineering. ACM, 2016.</li> <li>Fast, Ethan, Binbin Chen, and Michael S. Bernstein. "Empath: Understanding topic signals in large-scale text." Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems. ACM, 2016.</li> <li>Burt L. Monroe, Michael P. Colaresi, and Kevin M. Quinn. 2008. Fightin’ words: Lexical feature selection and evaluation for identifying the content of political conflict. Political Analysis.</li> <li>Bo Pang and Lillian Lee. A Sentimental Education: Sentiment Analysis Using Subjectivity Summarization Based on Minimum Cuts, Proceedings of the ACL, 2004.</li> <li>Abram Handler, Matt Denny, Hanna Wallach, and Brendan O'Connor. Bag of what? Simple noun phrase extraction for corpus analysis. NLP+CSS Workshop at EMNLP 2016.</li> <li>Peter Fankhauser, Jörg Knappen, Elke Teich. Exploring and visualizing variation in language resources. LREC 2014.</li> <li>Shinichi Nakagawa and Innes C. Cuthill. Effect size, confidence interval and statistical significance: a practical guide for biologists. 2007. In Biological Reviews 82.</li> <li>Cynthia M. Whissell. The dictionary of affect in language. 1993. In The Measurement of Emotions.</li> <li>David Bamman, Jacob Eisenstein, and Tyler Schnoebelen. GENDER IDENTITY AND LEXICAL VARIATION IN SOCIAL MEDIA. 2014.</li> <li>Rada Mihalcea, Paul Tarau. TextRank: Bringing Order into Text. EMNLP. 2004.</li> <li>Frimer, J. A., Boghrati, R., Haidt, J., Graham, J., &amp; Dehgani, M. Moral Foundations Dictionary for Linguistic Analyses 2.0. Unpublished manuscript. 2019.</li> <li>Jesse Graham, Jonathan Haidt, Sena Koleva, Matt Motyl, Ravi Iyer, Sean P Wojcik, and Peter H Ditto. 2013. Moral foundations theory: The pragmatic validity of moral pluralism. Advances in Experimental Social Psychology, 47, 55-130</li> <li>Ryan J. Gallagher, Morgan R. Frank, Lewis Mitchell, Aaron J. Schwartz, Andrew J. Reagan, Christopher M. Danforth, and Peter Sheridan Dodds. Generalized Word Shift Graphs: A Method for Visualizing and Explaining Pairwise Comparisons Between Texts. 2020. Arxiv. <a href="https://arxiv.org/pdf/2008.02250.pdf" rel="nofollow">https://arxiv.org/pdf/2008.02250.pdf</a></li> <li>Kocoń, Jan; Zaśko-Zielińska, Monika and Miłkowski, Piotr, 2019, PolEmo 2.0 Sentiment Analysis Dataset for CoNLL, CLARIN-PL digital repository, <a href="http://hdl.handle.net/11321/710" rel="nofollow">http://hdl.handle.net/11321/710</a>.</li> <li>George Forman. 2008. BNS feature scaling: an improved representation over tf-idf for svm text classification. In Proceedings of the 17th ACM conference on Information and knowledge management (CIKM '08). Association for Computing Machinery, New York, NY, USA, 263–270. <a href="https://doi.org/10.1145/1458082.1458119" rel="nofollow">https://doi.org/10.1145/1458082.1458119</a></li> <li>Anne-Kathrin Schumann. 2016. Brave new world: Uncovering topical dynamics in the ACL Anthology reference corpus using term life cycle information. In Proceedings of the 10th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities, pages 1–11, Berlin, Germany. Association for Computational Linguistics.</li> <li>Piao, S. S., Bianchi, F., Dayrell, C., D’egidio, A., &amp; Rayson, P. 2015. Development of the multilingual semantic annotation system. In Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (pp. 1268-1274).</li> <li>Cliff, N. (1993). Dominance statistics: Ordinal analyses to answer ordinal questions. Psychological Bulletin, 114(3), 494–509. <a href="https://doi.org/10.1037/0033-2909.114.3.494" rel="nofollow">https://doi.org/10.1037/0033-2909.114.3.494</a></li> <li>Altmann EG, Pierrehumbert JB, Motter AE (2011) Niche as a Determinant of Word Fate in Online Groups. PLoS ONE 6(5): e19009. <a href="https://doi.org/10.1371/journal.pone.0019009" rel="nofollow">https://doi.org/10.1371/journal.pone.0019009</a>.</li> </ul> </article></div></div></div></div></div> <!-- --> <!-- --> <script type="application/json" id="__PRIMER_DATA_:R0:__">{"resolvedServerColorMode":"day"}</script></div> </react-partial> <input type="hidden" data-csrf="true" value="WbMIq7rbolwCuV8vY9CRm2ZZzohUe5OA7AkDgOVGIa3H2iRq6ZNgq+Px7ZK9FrxRVBR2lPN98TAgXSYlgg1T/A==" /> </div> <div data-view-component="true" class="Layout-sidebar"> <div class="BorderGrid about-margin" data-pjax> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <div class="hide-sm hide-md"> <h2 class="mb-3 h4">About</h2> <p class="f4 my-3"> Beautiful visualizations of how language differs among document types. </p> <h3 class="sr-only">Topics</h3> <div class="my-3"> <div class="f6"> <a href="/topics/visualization" title="Topic: visualization" data-view-component="true" class="topic-tag topic-tag-link"> visualization </a> <a href="/topics/d3" title="Topic: d3" data-view-component="true" class="topic-tag topic-tag-link"> d3 </a> <a href="/topics/nlp" title="Topic: nlp" data-view-component="true" class="topic-tag topic-tag-link"> nlp </a> <a href="/topics/machine-learning" title="Topic: machine-learning" data-view-component="true" class="topic-tag topic-tag-link"> machine-learning </a> <a href="/topics/natural-language-processing" title="Topic: natural-language-processing" data-view-component="true" class="topic-tag topic-tag-link"> natural-language-processing </a> <a href="/topics/text-mining" title="Topic: text-mining" data-view-component="true" class="topic-tag topic-tag-link"> text-mining </a> <a href="/topics/word2vec" title="Topic: word2vec" data-view-component="true" class="topic-tag topic-tag-link"> word2vec </a> <a href="/topics/exploratory-data-analysis" title="Topic: exploratory-data-analysis" data-view-component="true" class="topic-tag topic-tag-link"> exploratory-data-analysis </a> <a href="/topics/word-embeddings" title="Topic: word-embeddings" data-view-component="true" class="topic-tag topic-tag-link"> word-embeddings </a> <a href="/topics/sentiment" title="Topic: sentiment" data-view-component="true" class="topic-tag topic-tag-link"> sentiment </a> <a href="/topics/eda" title="Topic: eda" data-view-component="true" class="topic-tag topic-tag-link"> eda </a> <a href="/topics/topic-modeling" title="Topic: topic-modeling" data-view-component="true" class="topic-tag topic-tag-link"> topic-modeling </a> <a href="/topics/scatter-plot" title="Topic: scatter-plot" data-view-component="true" class="topic-tag topic-tag-link"> scatter-plot </a> <a href="/topics/japanese-language" title="Topic: japanese-language" data-view-component="true" class="topic-tag topic-tag-link"> japanese-language </a> <a href="/topics/stylometry" title="Topic: stylometry" data-view-component="true" class="topic-tag topic-tag-link"> stylometry </a> <a href="/topics/computational-social-science" title="Topic: computational-social-science" data-view-component="true" class="topic-tag topic-tag-link"> computational-social-science </a> <a href="/topics/text-visualization" title="Topic: text-visualization" data-view-component="true" class="topic-tag topic-tag-link"> text-visualization </a> <a href="/topics/text-as-data" title="Topic: text-as-data" data-view-component="true" class="topic-tag topic-tag-link"> text-as-data </a> <a href="/topics/stylometric" title="Topic: stylometric" data-view-component="true" class="topic-tag topic-tag-link"> stylometric </a> <a href="/topics/semiotic-squares" title="Topic: semiotic-squares" data-view-component="true" class="topic-tag topic-tag-link"> semiotic-squares </a> </div> </div> <h3 class="sr-only">Resources</h3> <div class="mt-2"> <a class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:readme&quot;}" href="#readme-ov-file"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-book mr-2"> <path d="M0 1.75A.75.75 0 0 1 .75 1h4.253c1.227 0 2.317.59 3 1.501A3.743 3.743 0 0 1 11.006 1h4.245a.75.75 0 0 1 .75.75v10.5a.75.75 0 0 1-.75.75h-4.507a2.25 2.25 0 0 0-1.591.659l-.622.621a.75.75 0 0 1-1.06 0l-.622-.621A2.25 2.25 0 0 0 5.258 13H.75a.75.75 0 0 1-.75-.75Zm7.251 10.324.004-5.073-.002-2.253A2.25 2.25 0 0 0 5.003 2.5H1.5v9h3.757a3.75 3.75 0 0 1 1.994.574ZM8.755 4.75l-.004 7.322a3.752 3.752 0 0 1 1.992-.572H14.5v-9h-3.495a2.25 2.25 0 0 0-2.25 2.25Z"></path> </svg> Readme </a> </div> <h3 class="sr-only">License</h3> <div class="mt-2"> <a href="#Apache-2.0-1-ov-file" class="Link--muted" data-analytics-event="{&quot;category&quot;:&quot;Repository Overview&quot;,&quot;action&quot;:&quot;click&quot;,&quot;label&quot;:&quot;location:sidebar;file:license&quot;}" > <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-law mr-2"> <path d="M8.75.75V2h.985c.304 0 .603.08.867.231l1.29.736c.038.022.08.033.124.033h2.234a.75.75 0 0 1 0 1.5h-.427l2.111 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.006.005-.01.01-.045.04c-.21.176-.441.327-.686.45C14.556 10.78 13.88 11 13 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L12.178 4.5h-.162c-.305 0-.604-.079-.868-.231l-1.29-.736a.245.245 0 0 0-.124-.033H8.75V13h2.5a.75.75 0 0 1 0 1.5h-6.5a.75.75 0 0 1 0-1.5h2.5V3.5h-.984a.245.245 0 0 0-.124.033l-1.289.737c-.265.15-.564.23-.869.23h-.162l2.112 4.692a.75.75 0 0 1-.154.838l-.53-.53.529.531-.001.002-.002.002-.006.006-.016.015-.045.04c-.21.176-.441.327-.686.45C4.556 10.78 3.88 11 3 11a4.498 4.498 0 0 1-2.023-.454 3.544 3.544 0 0 1-.686-.45l-.045-.04-.016-.015-.006-.006-.004-.004v-.001a.75.75 0 0 1-.154-.838L2.178 4.5H1.75a.75.75 0 0 1 0-1.5h2.234a.249.249 0 0 0 .125-.033l1.288-.737c.265-.15.564-.23.869-.23h.984V.75a.75.75 0 0 1 1.5 0Zm2.945 8.477c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L13 6.327Zm-10 0c.285.135.718.273 1.305.273s1.02-.138 1.305-.273L3 6.327Z"></path> </svg> Apache-2.0 license </a> </div> <include-fragment src="/JasonKessler/scattertext/hovercards/citation/sidebar_partial?tree_name=master"> </include-fragment> <div class="mt-2"> <a href="/JasonKessler/scattertext/activity" data-view-component="true" class="Link Link--muted"><svg text="gray" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-pulse mr-2"> <path d="M6 2c.306 0 .582.187.696.471L10 10.731l1.304-3.26A.751.751 0 0 1 12 7h3.25a.75.75 0 0 1 0 1.5h-2.742l-1.812 4.528a.751.751 0 0 1-1.392 0L6 4.77 4.696 8.03A.75.75 0 0 1 4 8.5H.75a.75.75 0 0 1 0-1.5h2.742l1.812-4.529A.751.751 0 0 1 6 2Z"></path> </svg> <span class="color-fg-muted">Activity</span></a> </div> <h3 class="sr-only">Stars</h3> <div class="mt-2"> <a href="/JasonKessler/scattertext/stargazers" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-star mr-2"> <path d="M8 .25a.75.75 0 0 1 .673.418l1.882 3.815 4.21.612a.75.75 0 0 1 .416 1.279l-3.046 2.97.719 4.192a.751.751 0 0 1-1.088.791L8 12.347l-3.766 1.98a.75.75 0 0 1-1.088-.79l.72-4.194L.818 6.374a.75.75 0 0 1 .416-1.28l4.21-.611L7.327.668A.75.75 0 0 1 8 .25Zm0 2.445L6.615 5.5a.75.75 0 0 1-.564.41l-3.097.45 2.24 2.184a.75.75 0 0 1 .216.664l-.528 3.084 2.769-1.456a.75.75 0 0 1 .698 0l2.77 1.456-.53-3.084a.75.75 0 0 1 .216-.664l2.24-2.183-3.096-.45a.75.75 0 0 1-.564-.41L8 2.694Z"></path> </svg> <strong>2.3k</strong> stars</a> </div> <h3 class="sr-only">Watchers</h3> <div class="mt-2"> <a href="/JasonKessler/scattertext/watchers" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-eye mr-2"> <path d="M8 2c1.981 0 3.671.992 4.933 2.078 1.27 1.091 2.187 2.345 2.637 3.023a1.62 1.62 0 0 1 0 1.798c-.45.678-1.367 1.932-2.637 3.023C11.67 13.008 9.981 14 8 14c-1.981 0-3.671-.992-4.933-2.078C1.797 10.83.88 9.576.43 8.898a1.62 1.62 0 0 1 0-1.798c.45-.677 1.367-1.931 2.637-3.022C4.33 2.992 6.019 2 8 2ZM1.679 7.932a.12.12 0 0 0 0 .136c.411.622 1.241 1.75 2.366 2.717C5.176 11.758 6.527 12.5 8 12.5c1.473 0 2.825-.742 3.955-1.715 1.124-.967 1.954-2.096 2.366-2.717a.12.12 0 0 0 0-.136c-.412-.621-1.242-1.75-2.366-2.717C10.824 4.242 9.473 3.5 8 3.5c-1.473 0-2.825.742-3.955 1.715-1.124.967-1.954 2.096-2.366 2.717ZM8 10a2 2 0 1 1-.001-3.999A2 2 0 0 1 8 10Z"></path> </svg> <strong>54</strong> watching</a> </div> <h3 class="sr-only">Forks</h3> <div class="mt-2"> <a href="/JasonKessler/scattertext/forks" data-view-component="true" class="Link Link--muted"><svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-repo-forked mr-2"> <path d="M5 5.372v.878c0 .414.336.75.75.75h4.5a.75.75 0 0 0 .75-.75v-.878a2.25 2.25 0 1 1 1.5 0v.878a2.25 2.25 0 0 1-2.25 2.25h-1.5v2.128a2.251 2.251 0 1 1-1.5 0V8.5h-1.5A2.25 2.25 0 0 1 3.5 6.25v-.878a2.25 2.25 0 1 1 1.5 0ZM5 3.25a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Zm6.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5Zm-3 8.75a.75.75 0 1 0-1.5 0 .75.75 0 0 0 1.5 0Z"></path> </svg> <strong>292</strong> forks</a> </div> <div class="mt-2"> <a class="Link--muted" href="/contact/report-content?content_url=https%3A%2F%2Fgithub.com%2FJasonKessler%2Fscattertext&amp;report=JasonKessler+%28user%29"> Report repository </a> </div> </div> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame"> <a href="/JasonKessler/scattertext/releases" data-view-component="true" class="Link--primary no-underline Link">Releases <span title="1" data-view-component="true" class="Counter">1</span></a></h2> <a class="Link--primary d-flex no-underline" data-pjax="#repo-content-pjax-container" data-turbo-frame="repo-content-turbo-frame" href="/JasonKessler/scattertext/releases/tag/0.0.2.4.4"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-tag flex-shrink-0 mt-1 color-fg-success"> <path d="M1 7.775V2.75C1 1.784 1.784 1 2.75 1h5.025c.464 0 .91.184 1.238.513l6.25 6.25a1.75 1.75 0 0 1 0 2.474l-5.026 5.026a1.75 1.75 0 0 1-2.474 0l-6.25-6.25A1.752 1.752 0 0 1 1 7.775Zm1.5 0c0 .066.026.13.073.177l6.25 6.25a.25.25 0 0 0 .354 0l5.025-5.025a.25.25 0 0 0 0-.354l-6.25-6.25a.25.25 0 0 0-.177-.073H2.75a.25.25 0 0 0-.25.25ZM6 5a1 1 0 1 1 0 2 1 1 0 0 1 0-2Z"></path> </svg> <div class="ml-2 min-width-0"> <div class="d-flex"> <span class="css-truncate css-truncate-target text-bold mr-2" style="max-width: none;">0.0.2.4.4</span> <span title="Label: Latest" data-view-component="true" class="Label Label--success flex-shrink-0"> Latest </span> </div> <div class="text-small color-fg-muted"><relative-time datetime="2017-03-13T05:31:21Z" class="no-wrap">Mar 13, 2017</relative-time></div> </div> </a> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/users/JasonKessler/packages?repo_name=scattertext" data-view-component="true" class="Link--primary no-underline Link d-flex flex-items-center">Packages <span title="0" hidden="hidden" data-view-component="true" class="Counter ml-1">0</span></a></h2> <div class="text-small color-fg-muted" > No packages published <br> </div> </div> </div> <div class="BorderGrid-row" > <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/JasonKessler/scattertext/network/dependents" data-view-component="true" class="Link--primary no-underline Link">Used by <span title="657" data-view-component="true" class="Counter">657</span></a> </h2> <a class="d-flex flex-items-center" href="/JasonKessler/scattertext/network/dependents"> <ul class="hx_flex-avatar-stack list-style-none min-width-0"> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/71118482?s=64&amp;v=4" width="32" height="32" alt="@ionanicleoid" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/97718430?s=64&amp;v=4" width="32" height="32" alt="@LinearPL" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/100832921?s=64&amp;v=4" width="32" height="32" alt="@tonioeltopoquegira" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/158843669?s=64&amp;v=4" width="32" height="32" alt="@nobleyam" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/123460784?s=64&amp;v=4" width="32" height="32" alt="@Klepats" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/11177998?s=64&amp;v=4" width="32" height="32" alt="@aflueckiger" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/102739786?s=64&amp;v=4" width="32" height="32" alt="@mmtomczak" /> </li> <li class="hx_flex-avatar-stack-item"> <img class="avatar avatar-user" src="https://avatars.githubusercontent.com/u/68419391?s=64&amp;v=4" width="32" height="32" alt="@Haniff-Toha" /> </li> </ul> <span class="px-2 text-bold text-small no-wrap"> + 649 </span> </a> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3"> <a href="/JasonKessler/scattertext/graphs/contributors" data-view-component="true" class="Link--primary no-underline Link d-flex flex-items-center">Contributors <span title="12" data-view-component="true" class="Counter ml-1">12</span></a></h2> <ul class="list-style-none d-flex flex-wrap mb-n2"> <li class="mb-2 mr-2" > <a href="https://github.com/JasonKessler" class="" data-hovercard-type="user" data-hovercard-url="/users/JasonKessler/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/312924?s=64&amp;v=4" alt="@JasonKessler" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/andreasvc" class="" data-hovercard-type="user" data-hovercard-url="/users/andreasvc/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/261460?s=64&amp;v=4" alt="@andreasvc" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/chbrown" class="" data-hovercard-type="user" data-hovercard-url="/users/chbrown/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/360279?s=64&amp;v=4" alt="@chbrown" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/synapticarbors" class="" data-hovercard-type="user" data-hovercard-url="/users/synapticarbors/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/589279?s=64&amp;v=4" alt="@synapticarbors" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/tigerneil" class="" data-hovercard-type="user" data-hovercard-url="/users/tigerneil/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/5799436?s=64&amp;v=4" alt="@tigerneil" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/Anthonyive" class="" data-hovercard-type="user" data-hovercard-url="/users/Anthonyive/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/8257285?s=64&amp;v=4" alt="@Anthonyive" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/millengustavo" class="" data-hovercard-type="user" data-hovercard-url="/users/millengustavo/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/20524003?s=64&amp;v=4" alt="@millengustavo" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/David-Herman" class="" data-hovercard-type="user" data-hovercard-url="/users/David-Herman/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/22305345?s=64&amp;v=4" alt="@David-Herman" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/siddu1998" class="" data-hovercard-type="user" data-hovercard-url="/users/siddu1998/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/24701478?s=64&amp;v=4" alt="@siddu1998" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/shettyprithvi" class="" data-hovercard-type="user" data-hovercard-url="/users/shettyprithvi/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/36528857?s=64&amp;v=4" alt="@shettyprithvi" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/ckjellson" class="" data-hovercard-type="user" data-hovercard-url="/users/ckjellson/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/37980849?s=64&amp;v=4" alt="@ckjellson" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> <li class="mb-2 mr-2" > <a href="https://github.com/mastafaMicrosoft" class="" data-hovercard-type="user" data-hovercard-url="/users/mastafaMicrosoft/hovercard" data-octo-click="hovercard-link-click" data-octo-dimensions="link_type:self" > <img src="https://avatars.githubusercontent.com/u/68865108?s=64&amp;v=4" alt="@mastafaMicrosoft" size="32" height="32" width="32" data-view-component="true" class="avatar circle" /> </a> </li> </ul> </div> </div> <div class="BorderGrid-row"> <div class="BorderGrid-cell"> <h2 class="h4 mb-3">Languages</h2> <div class="mb-2"> <span data-view-component="true" class="Progress"> <span style="background-color:#3572A5 !important;;width: 64.8%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#f1e05a !important;;width: 32.7%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> <span style="background-color:#e34c26 !important;;width: 2.5%;" itemprop="keywords" data-view-component="true" class="Progress-item color-bg-success-emphasis"></span> </span></div> <ul class="list-style-none"> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/JasonKessler/scattertext/search?l=python" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#3572A5;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">Python</span> <span>64.8%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/JasonKessler/scattertext/search?l=javascript" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#f1e05a;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">JavaScript</span> <span>32.7%</span> </a> </li> <li class="d-inline"> <a class="d-inline-flex flex-items-center flex-nowrap Link--secondary no-underline text-small mr-3" href="/JasonKessler/scattertext/search?l=html" data-ga-click="Repository, language stats search click, location:repo overview"> <svg style="color:#e34c26;" aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-dot-fill mr-2"> <path d="M8 4a4 4 0 1 1 0 8 4 4 0 0 1 0-8Z"></path> </svg> <span class="color-fg-default text-bold mr-1">HTML</span> <span>2.5%</span> </a> </li> </ul> </div> </div> </div> </div> </div></div> </div> </div> </turbo-frame> </main> </div> </div> <footer class="footer pt-8 pb-6 f6 color-fg-muted p-responsive" role="contentinfo" > <h2 class='sr-only'>Footer</h2> <div class="d-flex flex-justify-center flex-items-center flex-column-reverse flex-lg-row flex-wrap flex-lg-nowrap"> <div class="d-flex flex-items-center flex-shrink-0 mx-2"> <a aria-label="Homepage" title="GitHub" class="footer-octicon mr-2" href="https://github.com"> <svg aria-hidden="true" height="24" viewBox="0 0 24 24" version="1.1" width="24" data-view-component="true" class="octicon octicon-mark-github"> <path d="M12 1C5.9225 1 1 5.9225 1 12C1 16.8675 4.14875 20.9787 8.52125 22.4362C9.07125 22.5325 9.2775 22.2025 9.2775 21.9137C9.2775 21.6525 9.26375 20.7862 9.26375 19.865C6.5 20.3737 5.785 19.1912 5.565 18.5725C5.44125 18.2562 4.905 17.28 4.4375 17.0187C4.0525 16.8125 3.5025 16.3037 4.42375 16.29C5.29 16.2762 5.90875 17.0875 6.115 17.4175C7.105 19.0812 8.68625 18.6137 9.31875 18.325C9.415 17.61 9.70375 17.1287 10.02 16.8537C7.5725 16.5787 5.015 15.63 5.015 11.4225C5.015 10.2262 5.44125 9.23625 6.1425 8.46625C6.0325 8.19125 5.6475 7.06375 6.2525 5.55125C6.2525 5.55125 7.17375 5.2625 9.2775 6.67875C10.1575 6.43125 11.0925 6.3075 12.0275 6.3075C12.9625 6.3075 13.8975 6.43125 14.7775 6.67875C16.8813 5.24875 17.8025 5.55125 17.8025 5.55125C18.4075 7.06375 18.0225 8.19125 17.9125 8.46625C18.6138 9.23625 19.04 10.2125 19.04 11.4225C19.04 15.6437 16.4688 16.5787 14.0213 16.8537C14.42 17.1975 14.7638 17.8575 14.7638 18.8887C14.7638 20.36 14.75 21.5425 14.75 21.9137C14.75 22.2025 14.9563 22.5462 15.5063 22.4362C19.8513 20.9787 23 16.8537 23 12C23 5.9225 18.0775 1 12 1Z"></path> </svg> </a> <span> &copy; 2025 GitHub,&nbsp;Inc. </span> </div> <nav aria-label="Footer"> <h3 class="sr-only" id="sr-footer-heading">Footer navigation</h3> <ul class="list-style-none d-flex flex-justify-center flex-wrap mb-2 mb-lg-0" aria-labelledby="sr-footer-heading"> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to Terms&quot;,&quot;label&quot;:&quot;text:terms&quot;}" href="https://docs.github.com/site-policy/github-terms/github-terms-of-service" data-view-component="true" class="Link--secondary Link">Terms</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to privacy&quot;,&quot;label&quot;:&quot;text:privacy&quot;}" href="https://docs.github.com/site-policy/privacy-policies/github-privacy-statement" data-view-component="true" class="Link--secondary Link">Privacy</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to security&quot;,&quot;label&quot;:&quot;text:security&quot;}" href="https://github.com/security" data-view-component="true" class="Link--secondary Link">Security</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to status&quot;,&quot;label&quot;:&quot;text:status&quot;}" href="https://www.githubstatus.com/" data-view-component="true" class="Link--secondary Link">Status</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to docs&quot;,&quot;label&quot;:&quot;text:docs&quot;}" href="https://docs.github.com/" data-view-component="true" class="Link--secondary Link">Docs</a> </li> <li class="mx-2"> <a data-analytics-event="{&quot;category&quot;:&quot;Footer&quot;,&quot;action&quot;:&quot;go to contact&quot;,&quot;label&quot;:&quot;text:contact&quot;}" href="https://support.github.com?tags=dotcom-footer" data-view-component="true" class="Link--secondary Link">Contact</a> </li> <li class="mx-2" > <cookie-consent-link> <button type="button" class="Link--secondary underline-on-hover border-0 p-0 color-bg-transparent" data-action="click:cookie-consent-link#showConsentManagement" data-analytics-event="{&quot;location&quot;:&quot;footer&quot;,&quot;action&quot;:&quot;cookies&quot;,&quot;context&quot;:&quot;subfooter&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;cookies_link_subfooter_footer&quot;}" > Manage cookies </button> </cookie-consent-link> </li> <li class="mx-2"> <cookie-consent-link> <button type="button" class="Link--secondary underline-on-hover border-0 p-0 color-bg-transparent" data-action="click:cookie-consent-link#showConsentManagement" data-analytics-event="{&quot;location&quot;:&quot;footer&quot;,&quot;action&quot;:&quot;dont_share_info&quot;,&quot;context&quot;:&quot;subfooter&quot;,&quot;tag&quot;:&quot;link&quot;,&quot;label&quot;:&quot;dont_share_info_link_subfooter_footer&quot;}" > Do not share my personal information </button> </cookie-consent-link> </li> </ul> </nav> </div> </footer> <ghcc-consent id="ghcc" class="position-fixed bottom-0 left-0" style="z-index: 999999" data-initial-cookie-consent-allowed="" data-cookie-consent-required="false"></ghcc-consent> <div id="ajax-error-message" class="ajax-error-message flash flash-error" hidden> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-alert"> <path d="M6.457 1.047c.659-1.234 2.427-1.234 3.086 0l6.082 11.378A1.75 1.75 0 0 1 14.082 15H1.918a1.75 1.75 0 0 1-1.543-2.575Zm1.763.707a.25.25 0 0 0-.44 0L1.698 13.132a.25.25 0 0 0 .22.368h12.164a.25.25 0 0 0 .22-.368Zm.53 3.996v2.5a.75.75 0 0 1-1.5 0v-2.5a.75.75 0 0 1 1.5 0ZM9 11a1 1 0 1 1-2 0 1 1 0 0 1 2 0Z"></path> </svg> <button type="button" class="flash-close js-ajax-error-dismiss" aria-label="Dismiss error"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> You can’t perform that action at this time. </div> <template id="site-details-dialog"> <details class="details-reset details-overlay details-overlay-dark lh-default color-fg-default hx_rsm" open> <summary role="button" aria-label="Close dialog"></summary> <details-dialog class="Box Box--overlay d-flex flex-column anim-fade-in fast hx_rsm-dialog hx_rsm-modal"> <button class="Box-btn-octicon m-0 btn-octicon position-absolute right-0 top-0" type="button" aria-label="Close dialog" data-close-dialog> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-x"> <path d="M3.72 3.72a.75.75 0 0 1 1.06 0L8 6.94l3.22-3.22a.749.749 0 0 1 1.275.326.749.749 0 0 1-.215.734L9.06 8l3.22 3.22a.749.749 0 0 1-.326 1.275.749.749 0 0 1-.734-.215L8 9.06l-3.22 3.22a.751.751 0 0 1-1.042-.018.751.751 0 0 1-.018-1.042L6.94 8 3.72 4.78a.75.75 0 0 1 0-1.06Z"></path> </svg> </button> <div class="octocat-spinner my-6 js-details-dialog-spinner"></div> </details-dialog> </details> </template> <div class="Popover js-hovercard-content position-absolute" style="display: none; outline: none;"> <div class="Popover-message Popover-message--bottom-left Popover-message--large Box color-shadow-large" style="width:360px;"> </div> </div> <template id="snippet-clipboard-copy-button"> <div class="zeroclipboard-container position-absolute right-0 top-0"> <clipboard-copy aria-label="Copy" class="ClipboardButton btn js-clipboard-copy m-2 p-0" data-copy-feedback="Copied!" data-tooltip-direction="w"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copy js-clipboard-copy-icon m-2"> <path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"></path><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-check js-clipboard-check-icon color-fg-success d-none m-2"> <path d="M13.78 4.22a.75.75 0 0 1 0 1.06l-7.25 7.25a.75.75 0 0 1-1.06 0L2.22 9.28a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L6 10.94l6.72-6.72a.75.75 0 0 1 1.06 0Z"></path> </svg> </clipboard-copy> </div> </template> <template id="snippet-clipboard-copy-button-unpositioned"> <div class="zeroclipboard-container"> <clipboard-copy aria-label="Copy" class="ClipboardButton btn btn-invisible js-clipboard-copy m-2 p-0 d-flex flex-justify-center flex-items-center" data-copy-feedback="Copied!" data-tooltip-direction="w"> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-copy js-clipboard-copy-icon"> <path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"></path><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"></path> </svg> <svg aria-hidden="true" height="16" viewBox="0 0 16 16" version="1.1" width="16" data-view-component="true" class="octicon octicon-check js-clipboard-check-icon color-fg-success d-none"> <path d="M13.78 4.22a.75.75 0 0 1 0 1.06l-7.25 7.25a.75.75 0 0 1-1.06 0L2.22 9.28a.751.751 0 0 1 .018-1.042.751.751 0 0 1 1.042-.018L6 10.94l6.72-6.72a.75.75 0 0 1 1.06 0Z"></path> </svg> </clipboard-copy> </div> </template> </div> <div id="js-global-screen-reader-notice" class="sr-only mt-n1" aria-live="polite" aria-atomic="true" ></div> <div id="js-global-screen-reader-notice-assertive" class="sr-only mt-n1" aria-live="assertive" aria-atomic="true"></div> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10