CINXE.COM

Document classification - Wikipedia

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Document classification - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"278be658-41c9-4884-8c0f-fb830079f9c6","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Document_classification","wgTitle":"Document classification","wgCurRevisionId":1222174881,"wgRevisionId":1222174881,"wgArticleId":1331441,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Webarchive template wayback links","CS1 maint: location missing publisher","Articles with short description","Short description is different from Wikidata","Data mining","Information science","Knowledge representation","Machine learning","Natural language processing"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Document_classification","wgRelevantArticleId":1331441,"wgIsProbablyEditable":true, "wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":10000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q302088","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics": true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","ext.popups", "ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.cite.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Document classification - Wikipedia"> <meta property="og:type" content="website"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Document_classification"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Document_classification&amp;action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Document_classification"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Document_classification rootpage-Document_classification skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&amp;returnto=Document+classification" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=Document+classification" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Document+classification" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Document+classification" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-&quot;Content-based&quot;_versus_&quot;request-based&quot;_classification" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#&quot;Content-based&quot;_versus_&quot;request-based&quot;_classification"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>"Content-based" versus "request-based" classification</span> </div> </a> <ul id="toc-&quot;Content-based&quot;_versus_&quot;request-based&quot;_classification-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Classification_versus_indexing" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Classification_versus_indexing"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Classification versus indexing</span> </div> </a> <ul id="toc-Classification_versus_indexing-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Automatic_document_classification_(ADC)" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Automatic_document_classification_(ADC)"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Automatic document classification (ADC)</span> </div> </a> <button aria-controls="toc-Automatic_document_classification_(ADC)-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Automatic document classification (ADC) subsection</span> </button> <ul id="toc-Automatic_document_classification_(ADC)-sublist" class="vector-toc-list"> <li id="toc-Techniques" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Techniques"> <div class="vector-toc-text"> <span class="vector-toc-numb">3.1</span> <span>Techniques</span> </div> </a> <ul id="toc-Techniques-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Applications" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Applications"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Applications</span> </div> </a> <ul id="toc-Applications-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Further_reading" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Further_reading"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>Further reading</span> </div> </a> <ul id="toc-Further_reading-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Document classification</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 19 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-19" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">19 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Classificaci%C3%B3_de_documents" title="Classificació de documents – Catalan" lang="ca" hreflang="ca" data-title="Classificació de documents" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Clasificaci%C3%B3n_de_documentos" title="Clasificación de documentos – Spanish" lang="es" hreflang="es" data-title="Clasificación de documentos" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-eu mw-list-item"><a href="https://eu.wikipedia.org/wiki/Dokumentuen_sailkapena" title="Dokumentuen sailkapena – Basque" lang="eu" hreflang="eu" data-title="Dokumentuen sailkapena" data-language-autonym="Euskara" data-language-local-name="Basque" class="interlanguage-link-target"><span>Euskara</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D8%AF%D8%B3%D8%AA%D9%87%E2%80%8C%D8%A8%D9%86%D8%AF%DB%8C_%D8%A7%D8%B3%D9%86%D8%A7%D8%AF" title="دسته‌بندی اسناد – Persian" lang="fa" hreflang="fa" data-title="دسته‌بندی اسناد" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Classification_et_cat%C3%A9gorisation_de_documents" title="Classification et catégorisation de documents – French" lang="fr" hreflang="fr" data-title="Classification et catégorisation de documents" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/%EB%AC%B8%EC%84%9C_%EB%B6%84%EB%A5%98" title="문서 분류 – Korean" lang="ko" hreflang="ko" data-title="문서 분류" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-id mw-list-item"><a href="https://id.wikipedia.org/wiki/Klasifikasi_dokumen" title="Klasifikasi dokumen – Indonesian" lang="id" hreflang="id" data-title="Klasifikasi dokumen" data-language-autonym="Bahasa Indonesia" data-language-local-name="Indonesian" class="interlanguage-link-target"><span>Bahasa Indonesia</span></a></li><li class="interlanguage-link interwiki-it mw-list-item"><a href="https://it.wikipedia.org/wiki/Text_categorization" title="Text categorization – Italian" lang="it" hreflang="it" data-title="Text categorization" data-language-autonym="Italiano" data-language-local-name="Italian" class="interlanguage-link-target"><span>Italiano</span></a></li><li class="interlanguage-link interwiki-nl mw-list-item"><a href="https://nl.wikipedia.org/wiki/Tekstclassificatie" title="Tekstclassificatie – Dutch" lang="nl" hreflang="nl" data-title="Tekstclassificatie" data-language-autonym="Nederlands" data-language-local-name="Dutch" class="interlanguage-link-target"><span>Nederlands</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/%E6%96%87%E6%9B%B8%E5%88%86%E9%A1%9E" title="文書分類 – Japanese" lang="ja" hreflang="ja" data-title="文書分類" data-language-autonym="日本語" data-language-local-name="Japanese" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-no mw-list-item"><a href="https://no.wikipedia.org/wiki/Tekstklassifisering" title="Tekstklassifisering – Norwegian Bokmål" lang="nb" hreflang="nb" data-title="Tekstklassifisering" data-language-autonym="Norsk bokmål" data-language-local-name="Norwegian Bokmål" class="interlanguage-link-target"><span>Norsk bokmål</span></a></li><li class="interlanguage-link interwiki-nn mw-list-item"><a href="https://nn.wikipedia.org/wiki/Dokumentklassifisering" title="Dokumentklassifisering – Norwegian Nynorsk" lang="nn" hreflang="nn" data-title="Dokumentklassifisering" data-language-autonym="Norsk nynorsk" data-language-local-name="Norwegian Nynorsk" class="interlanguage-link-target"><span>Norsk nynorsk</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/%D0%9A%D0%BB%D0%B0%D1%81%D1%81%D0%B8%D1%84%D0%B8%D0%BA%D0%B0%D1%86%D0%B8%D1%8F_%D0%B4%D0%BE%D0%BA%D1%83%D0%BC%D0%B5%D0%BD%D1%82%D0%BE%D0%B2" title="Классификация документов – Russian" lang="ru" hreflang="ru" data-title="Классификация документов" data-language-autonym="Русский" data-language-local-name="Russian" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-su mw-list-item"><a href="https://su.wikipedia.org/wiki/Klasifikasi_dokum%C3%A9n" title="Klasifikasi dokumén – Sundanese" lang="su" hreflang="su" data-title="Klasifikasi dokumén" data-language-autonym="Sunda" data-language-local-name="Sundanese" class="interlanguage-link-target"><span>Sunda</span></a></li><li class="interlanguage-link interwiki-fi mw-list-item"><a href="https://fi.wikipedia.org/wiki/Dokumenttien_luokittelu" title="Dokumenttien luokittelu – Finnish" lang="fi" hreflang="fi" data-title="Dokumenttien luokittelu" data-language-autonym="Suomi" data-language-local-name="Finnish" class="interlanguage-link-target"><span>Suomi</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/Belge_s%C4%B1n%C4%B1fland%C4%B1rma" title="Belge sınıflandırma – Turkish" lang="tr" hreflang="tr" data-title="Belge sınıflandırma" data-language-autonym="Türkçe" data-language-local-name="Turkish" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%9A%D0%BB%D0%B0%D1%81%D0%B8%D1%84%D1%96%D0%BA%D0%B0%D1%86%D1%96%D1%8F_%D0%B4%D0%BE%D0%BA%D1%83%D0%BC%D0%B5%D0%BD%D1%82%D1%96%D0%B2" title="Класифікація документів – Ukrainian" lang="uk" hreflang="uk" data-title="Класифікація документів" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E6%96%87%E6%AA%94%E5%88%86%E9%A1%9E" title="文檔分類 – Cantonese" lang="yue" hreflang="yue" data-title="文檔分類" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E6%96%87%E6%A1%A3%E5%88%86%E7%B1%BB" title="文档分类 – Chinese" lang="zh" hreflang="zh" data-title="文档分类" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q302088#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Document_classification" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Document_classification" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Document_classification"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Document_classification&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Document_classification&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Document_classification"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Document_classification&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Document_classification&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Document_classification" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Document_classification" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Document_classification&amp;oldid=1222174881" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Document_classification&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Document_classification&amp;id=1222174881&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FDocument_classification"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FDocument_classification"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Document_classification&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Document_classification&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q302088" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Process of categorizing documents</div> <p><b>Document classification</b> or <b>document categorization</b> is a problem in <a href="/wiki/Library_science" class="mw-redirect" title="Library science">library science</a>, <a href="/wiki/Information_science" title="Information science">information science</a> and <a href="/wiki/Computer_science" title="Computer science">computer science</a>. The task is to assign a <a href="/wiki/Document" title="Document">document</a> to one or more <a href="/wiki/Class_(philosophy)" title="Class (philosophy)">classes</a> or <a href="/wiki/Categorization" class="mw-redirect" title="Categorization">categories</a>. This may be done "manually" (or "intellectually") or <a href="/wiki/Algorithmically" class="mw-redirect" title="Algorithmically">algorithmically</a>. The intellectual classification of documents has mostly been the province of library science, while the algorithmic classification of documents is mainly in information science and computer science. The problems are overlapping, however, and there is therefore interdisciplinary research on document classification. </p><p>The documents to be classified may be texts, images, music, etc. Each kind of document possesses its special classification problems. When not otherwise specified, <b>text classification</b> is implied. </p><p>Documents may be classified according to their <a href="/wiki/Subject_(documents)" title="Subject (documents)">subjects</a> or according to other attributes (such as document type, author, printing year etc.). In the rest of this article only subject classification is considered. There are two main philosophies of subject classification of documents: the content-based approach and the request-based approach. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="&quot;Content-based&quot;_versus_&quot;request-based&quot;_classification"><span id=".22Content-based.22_versus_.22request-based.22_classification"></span>"Content-based" versus "request-based" classification</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=1" title="Edit section: &quot;Content-based&quot; versus &quot;request-based&quot; classification"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p><b>Content-based classification</b> is classification in which the weight given to particular subjects in a document determines the class to which the document is assigned. It is, for example, a common rule for classification in libraries, that at least 20% of the content of a book should be about the class to which the book is assigned.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup> In automatic classification it could be the number of times given words appears in a document. </p><p><b>Request-oriented classification</b> (or -indexing) is classification in which the anticipated request from users is influencing how documents are being classified. The classifier asks themself: “Under which descriptors should this entity be found?” and “think of all the possible queries and decide for which ones the entity at hand is relevant” (Soergel, 1985, p.&#160;230<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup>). </p><p>Request-oriented classification may be classification that is targeted towards a particular audience or user group. For example, a library or a database for feminist studies may classify/index documents differently when compared to a historical library. It is probably better, however, to understand request-oriented classification as <i>policy-based classification</i>: The classification is done according to some ideals and reflects the purpose of the library or database doing the classification. In this way it is not necessarily a kind of classification or indexing based on user studies. Only if empirical data about use or users are applied should request-oriented classification be regarded as a user-based approach. </p> <div class="mw-heading mw-heading2"><h2 id="Classification_versus_indexing">Classification versus indexing</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=2" title="Edit section: Classification versus indexing"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Sometimes a distinction is made between assigning documents to classes ("classification") versus assigning <a href="/wiki/Subject_(documents)" title="Subject (documents)">subjects</a> to documents ("<a href="/wiki/Subject_indexing" title="Subject indexing">subject indexing</a>") but as <a href="/wiki/Frederick_Wilfrid_Lancaster" title="Frederick Wilfrid Lancaster">Frederick Wilfrid Lancaster</a> has argued, this distinction is not fruitful. "These terminological distinctions,” he writes, “are quite meaningless and only serve to cause confusion” (Lancaster, 2003, p.&#160;21<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup>). The view that this distinction is purely superficial is also supported by the fact that a classification system may be transformed into a <a href="/wiki/Thesaurus" title="Thesaurus">thesaurus</a> and vice versa (cf., Aitchison, 1986,<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> 2004;<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup> Broughton, 2008;<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> Riesthuis &amp; Bliedung, 1991<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup>). Therefore, the act of labeling a document (say by assigning a term from a <a href="/wiki/Controlled_vocabulary" title="Controlled vocabulary">controlled vocabulary</a> to a document) is at the same time to assign that document to the class of documents indexed by that term (all documents indexed or classified as X belong to the same class of documents). In other words, labeling a document is the same as assigning it to the class of documents indexed under that label. </p> <div class="mw-heading mw-heading2"><h2 id="Automatic_document_classification_(ADC)"><span id="Automatic_document_classification_.28ADC.29"></span>Automatic document classification (ADC)</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=3" title="Edit section: Automatic document classification (ADC)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Automatic document classification tasks can be divided into three sorts: <b>supervised document classification</b> where some external mechanism (such as human feedback) provides information on the correct classification for documents, <b>unsupervised document classification</b> (also known as <a href="/wiki/Document_clustering" title="Document clustering">document clustering</a>), where the classification must be done entirely without reference to external information, and <b>semi-supervised document classification</b>,<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup> where parts of the documents are labeled by the external mechanism. There are several software products under various license models available.<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">&#91;</span>9<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">&#91;</span>12<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">&#91;</span>14<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Techniques">Techniques</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=4" title="Edit section: Techniques"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Automatic document classification techniques include: </p> <ul><li><a href="/wiki/Artificial_neural_network" class="mw-redirect" title="Artificial neural network">Artificial neural network</a></li> <li><a href="/wiki/Concept_Mining" class="mw-redirect" title="Concept Mining">Concept Mining</a></li> <li><a href="/wiki/Decision_tree_learning" title="Decision tree learning">Decision trees</a> such as <a href="/wiki/ID3_algorithm" title="ID3 algorithm">ID3</a> or <a href="/wiki/C4.5_algorithm" title="C4.5 algorithm">C4.5</a></li> <li><a href="/wiki/Expectation_maximization" class="mw-redirect" title="Expectation maximization">Expectation maximization</a> (EM)</li> <li><a href="/wiki/Instantaneously_trained_neural_networks" title="Instantaneously trained neural networks">Instantaneously trained neural networks</a></li> <li><a href="/wiki/Latent_semantic_indexing" class="mw-redirect" title="Latent semantic indexing">Latent semantic indexing</a></li> <li><a href="/wiki/Multiple-instance_learning" class="mw-redirect" title="Multiple-instance learning">Multiple-instance learning</a></li> <li><a href="/wiki/Naive_Bayes_classifier" title="Naive Bayes classifier">Naive Bayes classifier</a></li> <li><a href="/wiki/Natural_language_processing" title="Natural language processing">Natural language processing</a> approaches</li> <li><a href="/wiki/Rough_set" title="Rough set">Rough set</a>-based classifier</li> <li><a href="/wiki/Soft_set" title="Soft set">Soft set</a>-based classifier</li> <li><a href="/wiki/Support_vector_machines" class="mw-redirect" title="Support vector machines">Support vector machines</a> (SVM)</li> <li><a href="/wiki/K-nearest_neighbor_algorithm" class="mw-redirect" title="K-nearest neighbor algorithm">K-nearest neighbour algorithms</a></li> <li><a href="/wiki/Tf%E2%80%93idf" title="Tf–idf">tf–idf</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="Applications">Applications</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=5" title="Edit section: Applications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Classification techniques have been applied to </p> <ul><li><a href="/wiki/Spam_filter" class="mw-redirect" title="Spam filter">spam filtering</a>, a process which tries to discern <a href="/wiki/E-mail_spam" class="mw-redirect" title="E-mail spam">E-mail spam</a> messages from legitimate emails</li> <li>email <a href="/wiki/Routing" title="Routing">routing</a>, sending an email sent to a general address to a specific address or mailbox depending on topic<sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span class="cite-bracket">&#91;</span>15<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Language_identification" title="Language identification">language identification</a>, automatically determining the language of a text</li> <li>genre classification, automatically determining the genre of a text<sup id="cite_ref-16" class="reference"><a href="#cite_note-16"><span class="cite-bracket">&#91;</span>16<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/Readability" title="Readability">readability assessment</a>, automatically determining the degree of readability of a text, either to find suitable materials for different age groups or reader types or as part of a larger <a href="/wiki/Text_simplification" title="Text simplification">text simplification</a> system</li> <li><a href="/wiki/Sentiment_analysis" title="Sentiment analysis">sentiment analysis</a>, determining the attitude of a speaker or a writer with respect to some topic or the overall contextual polarity of a document.</li> <li>health-related classification using social media in public health surveillance <sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span class="cite-bracket">&#91;</span>17<span class="cite-bracket">&#93;</span></a></sup></li> <li>article triage, selecting articles that are relevant for manual literature curation, for example as is being done as the first step to generate manually curated annotation databases in biology <sup id="cite_ref-:0_18-0" class="reference"><a href="#cite_note-:0-18"><span class="cite-bracket">&#91;</span>18<span class="cite-bracket">&#93;</span></a></sup></li></ul> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=6" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1184024115">.mw-parser-output .div-col{margin-top:0.3em;column-width:30em}.mw-parser-output .div-col-small{font-size:90%}.mw-parser-output .div-col-rules{column-rule:1px solid #aaa}.mw-parser-output .div-col dl,.mw-parser-output .div-col ol,.mw-parser-output .div-col ul{margin-top:0}.mw-parser-output .div-col li,.mw-parser-output .div-col dd{page-break-inside:avoid;break-inside:avoid-column}</style><div class="div-col"> <ul><li><a href="/wiki/Categorization" class="mw-redirect" title="Categorization">Categorization</a></li> <li><a href="/wiki/Classification_(disambiguation)" class="mw-redirect" title="Classification (disambiguation)">Classification (disambiguation)</a></li> <li><a href="/wiki/Compound_term_processing" class="mw-redirect" title="Compound term processing">Compound term processing</a></li> <li><a href="/wiki/Concept-based_image_indexing" title="Concept-based image indexing">Concept-based image indexing</a></li> <li><a href="/wiki/Content-based_image_retrieval" title="Content-based image retrieval">Content-based image retrieval</a></li> <li><a href="/wiki/Decimal_section_numbering" class="mw-redirect" title="Decimal section numbering">Decimal section numbering</a></li> <li><a href="/wiki/Document" title="Document">Document</a></li> <li><a href="/wiki/Document_retrieval" title="Document retrieval">Document retrieval</a></li> <li><a href="/wiki/Document_clustering" title="Document clustering">Document clustering</a></li> <li><a href="/wiki/Information_retrieval" title="Information retrieval">Information retrieval</a></li> <li><a href="/wiki/Knowledge_organization" title="Knowledge organization">Knowledge organization</a></li> <li><a href="/wiki/Knowledge_Organization_System" class="mw-redirect" title="Knowledge Organization System">Knowledge Organization System</a></li> <li><a href="/wiki/Library_classification" title="Library classification">Library classification</a></li> <li><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a></li> <li><a href="/wiki/Native_Language_Identification" class="mw-redirect" title="Native Language Identification">Native Language Identification</a></li> <li><a href="/wiki/String_metrics" class="mw-redirect" title="String metrics">String metrics</a></li> <li><a href="/wiki/Subject_(documents)" title="Subject (documents)">Subject (documents)</a></li> <li><a href="/wiki/Subject_indexing" title="Subject indexing">Subject indexing</a></li> <li><a href="/wiki/Supervised_learning" title="Supervised learning">Supervised learning</a>, <a href="/wiki/Unsupervised_learning" title="Unsupervised learning">unsupervised learning</a></li> <li><a href="/wiki/Text_mining" title="Text mining">Text mining</a>, <a href="/wiki/Web_mining" class="mw-redirect" title="Web mining">web mining</a>, <a href="/wiki/Concept_mining" title="Concept mining">concept mining</a></li></ul> </div> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=7" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text">Library of Congress (2008). The subject headings manual. Washington, DC.: Library of Congress, Policy and Standards Division. (Sheet H 180: "Assign headings only for topics that comprise at least 20% of the work.")</span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text">Soergel, Dagobert (1985). <a rel="nofollow" class="external text" href="https://books.google.com/books?id=cHbNCgAAQBAJ">Organizing information: Principles of data base and retrieval systems</a>. Orlando, FL: Academic Press.</span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text">Lancaster, F. W. (2003). Indexing and abstracting in theory and practice. Library Association, London.</span> </li> <li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text">Aitchison, J. (1986). "A classification as a source for thesaurus: The Bibliographic Classification of H. E. Bliss as a source of thesaurus terms and structure." Journal of Documentation, Vol. 42 No. 3, pp. 160-181.</span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text">Aitchison, J. (2004). "Thesauri from BC2: Problems and possibilities revealed in an experimental thesaurus derived from the Bliss Music schedule." Bliss Classification Bulletin, Vol. 46, pp. 20-26.</span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text">Broughton, V. (2008). "<a rel="nofollow" class="external text" href="https://link.springer.com/article/10.1007/s10516-007-9027-7">A faceted classification as the basis of a faceted terminology: Conversion of a classified structure to thesaurus format in the Bliss Bibliographic Classification</a> (2nd Ed.).]" Axiomathes, Vol. 18 No.2, pp. 193-210.</span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text">Riesthuis, G. J. A., &amp; Bliedung, St. (1991). "Thesaurification of the UDC." Tools for knowledge organization and the human interface, Vol. 2, pp. 109-117. Index Verlag, Frankfurt.</span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"> Rossi, R. G., Lopes, A. d. A., and Rezende, S. O. (2016). <a rel="nofollow" class="external text" href="https://www.sciencedirect.com/science/article/pii/S0306457315000990">Optimization and label propagation in bipartite heterogeneous networks to improve transductive classification of texts</a>. Information Processing &amp; Management, 52(2):217–257.</span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20171115082749/https://pdfs.semanticscholar.org/bea4/a204239556a29228decc9e029c326e4900b7.pdf">"An Interactive Automatic Document Classification Prototype"</a> <span class="cs1-format">(PDF)</span>. Archived from <a rel="nofollow" class="external text" href="https://pdfs.semanticscholar.org/bea4/a204239556a29228decc9e029c326e4900b7.pdf">the original</a> <span class="cs1-format">(PDF)</span> on 2017-11-15<span class="reference-accessdate">. Retrieved <span class="nowrap">2017-11-14</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=An+Interactive+Automatic+Document+Classification+Prototype&amp;rft_id=https%3A%2F%2Fpdfs.semanticscholar.org%2Fbea4%2Fa204239556a29228decc9e029c326e4900b7.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ADocument+classification" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://seer.lcc.ufmg.br/index.php/jidm/article/download/43/41An">Interactive Automatic Document Classification Prototype</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20150424122349/https://seer.lcc.ufmg.br/index.php/jidm/article/download/43/41An">Archived</a> April 24, 2015, at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://archive.today/20141208063727/http://www.artsyltech.com/da_classification.htmlAutomatic">Document Classification - Artsyl</a></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.abbyy.com/ocr_sdk_windows/what_is_new/classification/">ABBYY FineReader Engine 11 for Windows</a></span> </li> <li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://www.antidot.net/classifier/">Classifier - Antidot</a></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.bisok.com/grooper-data-capture-method-features/document-classification/">"3 Document Classification Methods for Tough Projects"</a>. <i>www.bisok.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2021-08-04</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=www.bisok.com&amp;rft.atitle=3+Document+Classification+Methods+for+Tough+Projects&amp;rft_id=https%3A%2F%2Fwww.bisok.com%2Fgrooper-data-capture-method-features%2Fdocument-classification%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ADocument+classification" class="Z3988"></span></span> </li> <li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text">Stephan Busemann, Sven Schmeier and Roman G. Arens (2000). <a rel="nofollow" class="external text" href="https://arxiv.org/abs/cs/0003060">Message classification in the call center</a>. In Sergei Nirenburg, Douglas Appelt, Fabio Ciravegna and Robert Dale, eds., Proc. 6th Applied Natural Language Processing Conf. (ANLP'00), pp. 158–165, ACL.</span> </li> <li id="cite_note-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-16">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFSantiniRosso2008" class="citation cs2">Santini, Marina; Rosso, Mark (2008), <a rel="nofollow" class="external text" href="https://web.archive.org/web/20191115061125/https://www.bcs.org/upload/pdf/ewic_fd08_paper7.pdf"><i>Testing a Genre-Enabled Application: A Preliminary Assessment</i></a> <span class="cs1-format">(PDF)</span>, BCS IRSG Symposium: Future Directions in Information Access, London, UK, pp.&#160;54–63, archived from <a rel="nofollow" class="external text" href="http://www.bcs.org/upload/pdf/ewic_fd08_paper7.pdf">the original</a> <span class="cs1-format">(PDF)</span> on 2019-11-15<span class="reference-accessdate">, retrieved <span class="nowrap">2011-10-21</span></span></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Testing+a+Genre-Enabled+Application%3A+A+Preliminary+Assessment&amp;rft.place=London%2C+UK&amp;rft.series=BCS+IRSG+Symposium%3A+Future+Directions+in+Information+Access&amp;rft.pages=54-63&amp;rft.date=2008&amp;rft.aulast=Santini&amp;rft.aufirst=Marina&amp;rft.au=Rosso%2C+Mark&amp;rft_id=http%3A%2F%2Fwww.bcs.org%2Fupload%2Fpdf%2Fewic_fd08_paper7.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ADocument+classification" class="Z3988"></span><span class="cs1-maint citation-comment"><code class="cs1-code">{{<a href="/wiki/Template:Citation" title="Template:Citation">citation</a>}}</code>: CS1 maint: location missing publisher (<a href="/wiki/Category:CS1_maint:_location_missing_publisher" title="Category:CS1 maint: location missing publisher">link</a>)</span></span> </li> <li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text">X. Dai, M. Bikdash and B. Meyer, "From social media to public health surveillance: Word embedding based clustering method for twitter classification," SoutheastCon 2017, Charlotte, NC, 2017, pp. 1-7. <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FSECON.2017.7925400">10.1109/SECON.2017.7925400</a></span> </li> <li id="cite_note-:0-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-:0_18-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFKrallingerLeitnerRodriguez-PenagosValencia2008" class="citation journal cs1">Krallinger, M; Leitner, F; Rodriguez-Penagos, C; Valencia, A (2008). <a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2559988">"Overview of the protein-protein interaction annotation extraction task of Bio <i>Creative</i> II"</a>. <i>Genome Biology</i>. <b>9</b> (Suppl 2): S4. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1186%2Fgb-2008-9-s2-s4">10.1186/gb-2008-9-s2-s4</a></span>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2559988">2559988</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/18834495">18834495</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Genome+Biology&amp;rft.atitle=Overview+of+the+protein-protein+interaction+annotation+extraction+task+of+Bio+Creative+II&amp;rft.volume=9&amp;rft.issue=Suppl+2&amp;rft.pages=S4&amp;rft.date=2008&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC2559988%23id-name%3DPMC&amp;rft_id=info%3Apmid%2F18834495&amp;rft_id=info%3Adoi%2F10.1186%2Fgb-2008-9-s2-s4&amp;rft.aulast=Krallinger&amp;rft.aufirst=M&amp;rft.au=Leitner%2C+F&amp;rft.au=Rodriguez-Penagos%2C+C&amp;rft.au=Valencia%2C+A&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC2559988&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ADocument+classification" class="Z3988"></span></span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=8" title="Edit section: Further reading"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li>Fabrizio Sebastiani. <a rel="nofollow" class="external text" href="https://arxiv.org/abs/cs.ir/0110053">Machine learning in automated text categorization</a>. ACM Computing Surveys, 34(1):1–47, 2002.</li> <li>Stefan Büttcher, Charles L. A. Clarke, and Gordon V. Cormack. <a rel="nofollow" class="external text" href="http://www.ir.uwaterloo.ca/book/">Information Retrieval: Implementing and Evaluating Search Engines</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201005195805/http://www.ir.uwaterloo.ca/book/">Archived</a> 2020-10-05 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a>. MIT Press, 2010.</li></ul> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Document_classification&amp;action=edit&amp;section=9" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a rel="nofollow" class="external text" href="https://web.archive.org/web/20070613200617/http://isp.imm.dtu.dk/thor/projects/multimedia/textmining/node11.html">Introduction to document classification</a></li> <li><a rel="nofollow" class="external text" href="https://www.cs.technion.ac.il/~gabr/resources/atc/atcbib.html">Bibliography on Automated Text Categorization</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20190926230122/http://www.cs.technion.ac.il/~gabr/resources/atc/atcbib.html">Archived</a> 2019-09-26 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a></li> <li><a rel="nofollow" class="external text" href="http://liinwww.ira.uka.de/bibliography/Ai/query-classification.html">Bibliography on Query Classification</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20191002205729/http://liinwww.ira.uka.de/bibliography/Ai/query-classification.html">Archived</a> 2019-10-02 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a></li> <li><a rel="nofollow" class="external text" href="http://www.gabormelli.com/RKB/Text_Classification_Task">Text Classification</a> analysis page</li> <li><a rel="nofollow" class="external text" href="http://www.nltk.org/book/ch06.html">Learning to Classify Text - Chap. 6 of the book Natural Language Processing with Python</a> (available online)</li> <li><a rel="nofollow" class="external text" href="http://techtc.cs.technion.ac.il">TechTC - Technion Repository of Text Categorization Datasets</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20200214072558/http://techtc.cs.technion.ac.il/">Archived</a> 2020-02-14 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a></li> <li><a rel="nofollow" class="external text" href="http://www.daviddlewis.com/resources/testcollections/">David D. Lewis's Datasets</a></li> <li><a rel="nofollow" class="external text" href="http://www.biocreative.org/tasks/biocreative-iii/ppi/">BioCreative III ACT (article classification task) dataset</a></li></ul> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Natural_language_processing" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Natural_language_processing" title="Template:Natural language processing"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Natural_language_processing" title="Template talk:Natural language processing"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Natural_language_processing" title="Special:EditPage/Template:Natural language processing"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Natural_language_processing" style="font-size:114%;margin:0 4em"><a href="/wiki/Natural_language_processing" title="Natural language processing">Natural language processing</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">General terms</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/AI-complete" title="AI-complete">AI-complete</a></li> <li><a href="/wiki/Bag-of-words_model" title="Bag-of-words model">Bag-of-words</a></li> <li><a href="/wiki/N-gram" title="N-gram">n-gram</a> <ul><li><a href="/wiki/Bigram" title="Bigram">Bigram</a></li> <li><a href="/wiki/Trigram" title="Trigram">Trigram</a></li></ul></li> <li><a href="/wiki/Computational_linguistics" title="Computational linguistics">Computational linguistics</a></li> <li><a href="/wiki/Natural_language_understanding" title="Natural language understanding">Natural language understanding</a></li> <li><a href="/wiki/Stop_word" title="Stop word">Stop words</a></li> <li><a href="/wiki/Text_processing" title="Text processing">Text processing</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text_mining" title="Text mining">Text analysis</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Argument_mining" title="Argument mining">Argument mining</a></li> <li><a href="/wiki/Collocation_extraction" title="Collocation extraction">Collocation extraction</a></li> <li><a href="/wiki/Concept_mining" title="Concept mining">Concept mining</a></li> <li><a href="/wiki/Coreference#Coreference_resolution" title="Coreference">Coreference resolution</a></li> <li><a href="/wiki/Deep_linguistic_processing" title="Deep linguistic processing">Deep linguistic processing</a></li> <li><a href="/wiki/Distant_reading" title="Distant reading">Distant reading</a></li> <li><a href="/wiki/Information_extraction" title="Information extraction">Information extraction</a></li> <li><a href="/wiki/Named-entity_recognition" title="Named-entity recognition">Named-entity recognition</a></li> <li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li> <li><a href="/wiki/Parsing" title="Parsing">Parsing</a> <ul><li><a href="/wiki/Semantic_parsing" title="Semantic parsing">Semantic parsing</a></li> <li><a href="/wiki/Syntactic_parsing_(computational_linguistics)" title="Syntactic parsing (computational linguistics)">Syntactic parsing</a></li></ul></li> <li><a href="/wiki/Part-of-speech_tagging" title="Part-of-speech tagging">Part-of-speech tagging</a></li> <li><a href="/wiki/Semantic_analysis_(machine_learning)" title="Semantic analysis (machine learning)">Semantic analysis</a></li> <li><a href="/wiki/Semantic_role_labeling" title="Semantic role labeling">Semantic role labeling</a></li> <li><a href="/wiki/Semantic_decomposition_(natural_language_processing)" title="Semantic decomposition (natural language processing)">Semantic decomposition</a></li> <li><a href="/wiki/Semantic_similarity" title="Semantic similarity">Semantic similarity</a></li> <li><a href="/wiki/Sentiment_analysis" title="Sentiment analysis">Sentiment analysis</a></li></ul> <ul><li><a href="/wiki/Terminology_extraction" title="Terminology extraction">Terminology extraction</a></li> <li><a href="/wiki/Text_mining" title="Text mining">Text mining</a></li> <li><a href="/wiki/Textual_entailment" title="Textual entailment">Textual entailment</a></li> <li><a href="/wiki/Truecasing" title="Truecasing">Truecasing</a></li> <li><a href="/wiki/Word-sense_disambiguation" title="Word-sense disambiguation">Word-sense disambiguation</a></li> <li><a href="/wiki/Word-sense_induction" title="Word-sense induction">Word-sense induction</a></li></ul> </div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th id="Text_segmentation" scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text_segmentation" title="Text segmentation">Text segmentation</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Compound-term_processing" title="Compound-term processing">Compound-term processing</a></li> <li><a href="/wiki/Lemmatisation" class="mw-redirect" title="Lemmatisation">Lemmatisation</a></li> <li><a href="/wiki/Lexical_analysis" title="Lexical analysis">Lexical analysis</a></li> <li><a href="/wiki/Shallow_parsing" title="Shallow parsing">Text chunking</a></li> <li><a href="/wiki/Stemming" title="Stemming">Stemming</a></li> <li><a href="/wiki/Sentence_boundary_disambiguation" title="Sentence boundary disambiguation">Sentence segmentation</a></li> <li><a href="/wiki/Word#Word_boundaries" title="Word">Word segmentation</a></li></ul> </div></td></tr></tbody></table><div> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Automatic_summarization" title="Automatic summarization">Automatic summarization</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Multi-document_summarization" title="Multi-document summarization">Multi-document summarization</a></li> <li><a href="/wiki/Sentence_extraction" title="Sentence extraction">Sentence extraction</a></li> <li><a href="/wiki/Text_simplification" title="Text simplification">Text simplification</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Machine_translation" title="Machine translation">Machine translation</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Computer-assisted_translation" title="Computer-assisted translation">Computer-assisted</a></li> <li><a href="/wiki/Example-based_machine_translation" title="Example-based machine translation">Example-based</a></li> <li><a href="/wiki/Rule-based_machine_translation" title="Rule-based machine translation">Rule-based</a></li> <li><a href="/wiki/Statistical_machine_translation" title="Statistical machine translation">Statistical</a></li> <li><a href="/wiki/Transfer-based_machine_translation" title="Transfer-based machine translation">Transfer-based</a></li> <li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">Neural</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Distributional_semantics" title="Distributional semantics">Distributional semantics</a> models</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li> <li><a href="/wiki/Document-term_matrix" title="Document-term matrix">Document-term matrix</a></li> <li><a href="/wiki/Explicit_semantic_analysis" title="Explicit semantic analysis">Explicit semantic analysis</a></li> <li><a href="/wiki/FastText" title="FastText">fastText</a></li> <li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li> <li><a href="/wiki/Language_model" title="Language model">Language model</a> (<a href="/wiki/Large_language_model" title="Large language model">large</a>)</li> <li><a href="/wiki/Latent_semantic_analysis" title="Latent semantic analysis">Latent semantic analysis</a></li> <li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li> <li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li> <li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Language_resource" title="Language resource">Language resources</a>,<br />datasets and corpora</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Types and<br />standards</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Corpus_linguistics" title="Corpus linguistics">Corpus linguistics</a></li> <li><a href="/wiki/Lexical_resource" title="Lexical resource">Lexical resource</a></li> <li><a href="/wiki/Linguistic_Linked_Open_Data" title="Linguistic Linked Open Data">Linguistic Linked Open Data</a></li> <li><a href="/wiki/Machine-readable_dictionary" title="Machine-readable dictionary">Machine-readable dictionary</a></li> <li><a href="/wiki/Parallel_text" title="Parallel text">Parallel text</a></li> <li><a href="/wiki/PropBank" title="PropBank">PropBank</a></li> <li><a href="/wiki/Semantic_network" title="Semantic network">Semantic network</a></li> <li><a href="/wiki/Simple_Knowledge_Organization_System" title="Simple Knowledge Organization System">Simple Knowledge Organization System</a></li> <li><a href="/wiki/Speech_corpus" title="Speech corpus">Speech corpus</a></li> <li><a href="/wiki/Text_corpus" title="Text corpus">Text corpus</a></li> <li><a href="/wiki/Thesaurus_(information_retrieval)" title="Thesaurus (information retrieval)">Thesaurus (information retrieval)</a></li> <li><a href="/wiki/Treebank" title="Treebank">Treebank</a></li> <li><a href="/wiki/Universal_Dependencies" title="Universal Dependencies">Universal Dependencies</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Data</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/BabelNet" title="BabelNet">BabelNet</a></li> <li><a href="/wiki/Bank_of_English" title="Bank of English">Bank of English</a></li> <li><a href="/wiki/DBpedia" title="DBpedia">DBpedia</a></li> <li><a href="/wiki/FrameNet" title="FrameNet">FrameNet</a></li> <li><a href="/wiki/Google_Ngram_Viewer" class="mw-redirect" title="Google Ngram Viewer">Google Ngram Viewer</a></li> <li><a href="/wiki/UBY" title="UBY">UBY</a></li> <li><a href="/wiki/WordNet" title="WordNet">WordNet</a></li> <li><a href="/wiki/Wikidata" title="Wikidata">Wikidata</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Automatic_identification_and_data_capture" title="Automatic identification and data capture">Automatic identification<br />and data capture</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a></li> <li><a href="/wiki/Speech_segmentation" title="Speech segmentation">Speech segmentation</a></li> <li><a href="/wiki/Speech_synthesis" title="Speech synthesis">Speech synthesis</a></li> <li><a href="/wiki/Natural_language_generation" title="Natural language generation">Natural language generation</a></li> <li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">Optical character recognition</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Topic_model" title="Topic model">Topic model</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a class="mw-selflink selflink">Document classification</a></li> <li><a href="/wiki/Latent_Dirichlet_allocation" title="Latent Dirichlet allocation">Latent Dirichlet allocation</a></li> <li><a href="/wiki/Pachinko_allocation" title="Pachinko allocation">Pachinko allocation</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer-assisted_reviewing" title="Computer-assisted reviewing">Computer-assisted<br />reviewing</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Automated_essay_scoring" title="Automated essay scoring">Automated essay scoring</a></li> <li><a href="/wiki/Concordancer" title="Concordancer">Concordancer</a></li> <li><a href="/wiki/Grammar_checker" title="Grammar checker">Grammar checker</a></li> <li><a href="/wiki/Predictive_text" title="Predictive text">Predictive text</a></li> <li><a href="/wiki/Pronunciation_assessment" title="Pronunciation assessment">Pronunciation assessment</a></li> <li><a href="/wiki/Spell_checker" title="Spell checker">Spell checker</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Natural-language_user_interface" title="Natural-language user interface">Natural language<br />user interface</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Chatbot" title="Chatbot">Chatbot</a></li> <li><a href="/wiki/Interactive_fiction" title="Interactive fiction">Interactive fiction</a> (c.f. <a href="/wiki/Syntax_guessing" class="mw-redirect" title="Syntax guessing">Syntax guessing</a>)</li> <li><a href="/wiki/Question_answering" title="Question answering">Question answering</a></li> <li><a href="/wiki/Virtual_assistant" title="Virtual assistant">Virtual assistant</a></li> <li><a href="/wiki/Voice_user_interface" title="Voice user interface">Voice user interface</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Related</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Formal_semantics_(natural_language)" title="Formal semantics (natural language)">Formal semantics</a></li> <li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li> <li><a href="/wiki/Natural_Language_Toolkit" title="Natural Language Toolkit">Natural Language Toolkit</a></li> <li><a href="/wiki/SpaCy" title="SpaCy">spaCy</a></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.codfw.main‐f69cdc8f6‐f4smn Cached time: 20241122172231 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.284 seconds Real time usage: 0.379 seconds Preprocessor visited node count: 953/1000000 Post‐expand include size: 38815/2097152 bytes Template argument size: 645/2097152 bytes Highest expansion depth: 9/100 Expensive parser function count: 1/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 34464/5000000 bytes Lua time usage: 0.166/10.000 seconds Lua memory usage: 4156552/52428800 bytes Number of Wikibase entities loaded: 0/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 320.573 1 -total 42.28% 135.527 1 Template:Reflist 25.77% 82.606 2 Template:Cite_web 25.23% 80.882 1 Template:Natural_Language_Processing 24.39% 78.186 1 Template:Short_description 23.29% 74.668 3 Template:Navbox 12.13% 38.875 2 Template:Pagetype 8.73% 27.975 4 Template:Main_other 3.89% 12.473 5 Template:Webarchive 3.65% 11.716 1 Template:SDcat --> <!-- Saved in parser cache with key enwiki:pcache:idhash:1331441-0!canonical and timestamp 20241122172231 and revision id 1222174881. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Document_classification&amp;oldid=1222174881">https://en.wikipedia.org/w/index.php?title=Document_classification&amp;oldid=1222174881</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Data_mining" title="Category:Data mining">Data mining</a></li><li><a href="/wiki/Category:Information_science" title="Category:Information science">Information science</a></li><li><a href="/wiki/Category:Knowledge_representation" title="Category:Knowledge representation">Knowledge representation</a></li><li><a href="/wiki/Category:Machine_learning" title="Category:Machine learning">Machine learning</a></li><li><a href="/wiki/Category:Natural_language_processing" title="Category:Natural language processing">Natural language processing</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Webarchive_template_wayback_links" title="Category:Webarchive template wayback links">Webarchive template wayback links</a></li><li><a href="/wiki/Category:CS1_maint:_location_missing_publisher" title="Category:CS1 maint: location missing publisher">CS1 maint: location missing publisher</a></li><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_is_different_from_Wikidata" title="Category:Short description is different from Wikidata">Short description is different from Wikidata</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 4 May 2024, at 10:53<span class="anonymous-show">&#160;(UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Document_classification&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.canary-84779d6bf6-nb2j4","wgBackendResponseTime":119,"wgPageParseReport":{"limitreport":{"cputime":"0.284","walltime":"0.379","ppvisitednodes":{"value":953,"limit":1000000},"postexpandincludesize":{"value":38815,"limit":2097152},"templateargumentsize":{"value":645,"limit":2097152},"expansiondepth":{"value":9,"limit":100},"expensivefunctioncount":{"value":1,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":34464,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00% 320.573 1 -total"," 42.28% 135.527 1 Template:Reflist"," 25.77% 82.606 2 Template:Cite_web"," 25.23% 80.882 1 Template:Natural_Language_Processing"," 24.39% 78.186 1 Template:Short_description"," 23.29% 74.668 3 Template:Navbox"," 12.13% 38.875 2 Template:Pagetype"," 8.73% 27.975 4 Template:Main_other"," 3.89% 12.473 5 Template:Webarchive"," 3.65% 11.716 1 Template:SDcat"]},"scribunto":{"limitreport-timeusage":{"value":"0.166","limit":"10.000"},"limitreport-memusage":{"value":4156552,"limit":52428800}},"cachereport":{"origin":"mw-web.codfw.main-f69cdc8f6-f4smn","timestamp":"20241122172231","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Document classification","url":"https:\/\/en.wikipedia.org\/wiki\/Document_classification","sameAs":"http:\/\/www.wikidata.org\/entity\/Q302088","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q302088","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2004-12-27T10:37:21Z","dateModified":"2024-05-04T10:53:27Z","headline":"problem in library science, information science and computer science"}</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10