CINXE.COM
Apache Tika - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Apache Tika - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"4e570805-b97a-4e02-99e3-201f255cfe1c","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Apache_Tika","wgTitle":"Apache Tika","wgCurRevisionId":1237951715,"wgRevisionId":1237951715,"wgArticleId":50189796,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Apache Software Foundation projects","Java platform","Free software programmed in Java (programming language)","Java (programming language) libraries","Software using the Apache license"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Apache_Tika","wgRelevantArticleId":50189796,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true, "wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":6000,"wgRelatedArticlesCompat":[],"wgCentralAuthMobileDomain":false,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q2858088","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false, "wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init", "ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.4"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Apache_Tika_Logo.svg/1200px-Apache_Tika_Logo.svg.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="427"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Apache_Tika_Logo.svg/800px-Apache_Tika_Logo.svg.png"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="284"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Apache_Tika_Logo.svg/640px-Apache_Tika_Logo.svg.png"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="228"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Apache Tika - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Apache_Tika"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Apache_Tika&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Apache_Tika"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Apache_Tika rootpage-Apache_Tika skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=Apache+Tika" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=Apache+Tika" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=Apache+Tika" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=Apache+Tika" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-History" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#History"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>History</span> </div> </a> <ul id="toc-History-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Features" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Features"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Features</span> </div> </a> <ul id="toc-Features-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Notable_uses" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Notable_uses"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Notable uses</span> </div> </a> <ul id="toc-Notable_uses-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Apache Tika</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 4 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-4" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">4 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Apache_Tika" title="Apache Tika – French" lang="fr" hreflang="fr" data-title="Apache Tika" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-it mw-list-item"><a href="https://it.wikipedia.org/wiki/Apache_Tika" title="Apache Tika – Italian" lang="it" hreflang="it" data-title="Apache Tika" data-language-autonym="Italiano" data-language-local-name="Italian" class="interlanguage-link-target"><span>Italiano</span></a></li><li class="interlanguage-link interwiki-pl mw-list-item"><a href="https://pl.wikipedia.org/wiki/Apache_Tika" title="Apache Tika – Polish" lang="pl" hreflang="pl" data-title="Apache Tika" data-language-autonym="Polski" data-language-local-name="Polish" class="interlanguage-link-target"><span>Polski</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/Apache_Tika" title="Apache Tika – Ukrainian" lang="uk" hreflang="uk" data-title="Apache Tika" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q2858088#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Apache_Tika" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Apache_Tika" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Apache_Tika"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Apache_Tika&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Apache_Tika&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Apache_Tika"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Apache_Tika&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Apache_Tika&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Apache_Tika" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Apache_Tika" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Apache_Tika&oldid=1237951715" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Apache_Tika&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=Apache_Tika&id=1237951715&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FApache_Tika"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FApache_Tika"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=Apache_Tika&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Apache_Tika&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="wb-otherproject-link wb-otherproject-commons mw-list-item"><a href="https://commons.wikimedia.org/wiki/Category:Apache_Tika" hreflang="en"><span>Wikimedia Commons</span></a></li><li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q2858088" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Open-source content analysis framework</div> <style data-mw-deduplicate="TemplateStyles:r1257001546">.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme) div:not(.notheme){background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table tr{display:table-row!important}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}</style><table class="infobox vevent"><caption class="infobox-title summary">Tika</caption><tbody><tr><td colspan="2" class="infobox-image logo"><span class="mw-default-size" typeof="mw:File/Frameless"><a href="/wiki/File:Apache_Tika_Logo.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/7/74/Apache_Tika_Logo.svg/220px-Apache_Tika_Logo.svg.png" decoding="async" width="220" height="78" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/7/74/Apache_Tika_Logo.svg/330px-Apache_Tika_Logo.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/7/74/Apache_Tika_Logo.svg/440px-Apache_Tika_Logo.svg.png 2x" data-file-width="512" data-file-height="182" /></a></span></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Programmer" title="Programmer">Developer(s)</a></th><td class="infobox-data"><a href="/wiki/Apache_Software_Foundation" class="mw-redirect" title="Apache Software Foundation">Apache Software Foundation</a></td></tr><tr style="display: none;"><td colspan="2" class="infobox-full-data"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1257001546"></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_release_life_cycle" title="Software release life cycle">Stable release</a></th><td class="infobox-data"><div style="margin:0px;">2.9.1 <span class="mw-valign-text-top" typeof="mw:File/Frameless"><a href="https://www.wikidata.org/wiki/Q2858088?uselang=en#P348" title="Edit this on Wikidata"><img alt="Edit this on Wikidata" src="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png" decoding="async" width="10" height="10" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/15px-OOjs_UI_icon_edit-ltr-progressive.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/20px-OOjs_UI_icon_edit-ltr-progressive.svg.png 2x" data-file-width="20" data-file-height="20" /></a></span> / 20 October 2023<span class="noprint">; 13 months ago</span><span style="display:none"> (<span class="bday dtstart published updated">20 October 2023</span>)</span></div></td></tr><tr style="display:none"><td colspan="2"> </td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Repository_(version_control)" title="Repository (version control)">Repository</a></th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://gitbox.apache.org/repos/asf?p=tika.git">Tika Repository</a></span></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Written in</th><td class="infobox-data"><a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Operating_system" title="Operating system">Operating system</a></th><td class="infobox-data"><a href="/wiki/Cross-platform" class="mw-redirect" title="Cross-platform">Cross-platform</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_categories#Categorization_approaches" title="Software categories">Type</a></th><td class="infobox-data"><a href="/wiki/Search_algorithm" title="Search algorithm">Search</a> and <a href="/wiki/Index_(search_engine)" class="mw-redirect" title="Index (search engine)">index</a> <a href="/wiki/Application_programming_interface" class="mw-redirect" title="Application programming interface">API</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_license" title="Software license">License</a></th><td class="infobox-data"><a href="/wiki/Apache_License_2.0" class="mw-redirect" title="Apache License 2.0">Apache License 2.0</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Website</th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://tika.apache.org/">tika<wbr />.apache<wbr />.org</a></span></td></tr></tbody></table> <p><b>Apache Tika</b> is a content detection and <a href="/wiki/Content_analysis" title="Content analysis">analysis</a> framework, written in <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a>, stewarded at the <a href="/wiki/Apache_Software_Foundation" class="mw-redirect" title="Apache Software Foundation">Apache Software Foundation</a>.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> It detects and extracts metadata and text from over a thousand different <a href="/wiki/File_type" class="mw-redirect" title="File type">file types</a>, and as well as providing a <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a> library, has server and command-line editions suitable for use from other programming languages. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="History">History</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Tika&action=edit&section=1" title="Edit section: History"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The project originated as part of the <a href="/wiki/Apache_Nutch" title="Apache Nutch">Apache Nutch</a> codebase, to provide content identification and extraction when <a href="/wiki/Web_crawlers" class="mw-redirect" title="Web crawlers">crawling</a>. In 2007, it was separated out, to make it more extensible and usable by <a href="/wiki/Content_management_systems" class="mw-redirect" title="Content management systems">content management systems</a>, other <a href="/wiki/Web_crawlers" class="mw-redirect" title="Web crawlers">Web crawlers</a>, and information retrieval systems. The standalone Tika was founded by Jérôme Charron, <a href="/wiki/Chris_Mattmann" title="Chris Mattmann">Chris Mattmann</a> and Jukka Zitting.<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> In 2011 Chris Mattmann and Jukka Zitting released the Manning book "Tika in Action", and the project released version 1.0. </p> <div class="mw-heading mw-heading2"><h2 id="Features">Features</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Tika&action=edit&section=2" title="Edit section: Features"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Tika provides capabilities for identification of more than 1400 file types from the <a href="/wiki/Internet_Assigned_Numbers_Authority" title="Internet Assigned Numbers Authority">Internet Assigned Numbers Authority</a> taxonomy of <a href="/wiki/Media_type" title="Media type">MIME types</a>. For most of the more common and popular formats,<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> Tika then provides content extraction, metadata extraction and language identification capabilities. </p><p>It can also get text from images by using the <a href="/wiki/Optical_character_recognition" title="Optical character recognition">OCR</a> software <a href="/wiki/Tesseract_(software)" title="Tesseract (software)">Tesseract</a>.<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> </p><p>While Tika is written in <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a>, it is widely used from other languages.<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> The <a href="/wiki/Representational_state_transfer" class="mw-redirect" title="Representational state transfer">RESTful</a> server and <a href="/wiki/Command-line_interface" title="Command-line interface">CLI Tool</a> permit non-Java programs to access the Tika functionality. </p> <div class="mw-heading mw-heading2"><h2 id="Notable_uses">Notable uses</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Tika&action=edit&section=3" title="Edit section: Notable uses"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Tika is used by financial institutions including the <a href="/wiki/Fair_Isaac_Corporation" class="mw-redirect" title="Fair Isaac Corporation">Fair Isaac Corporation</a> (FICO),<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> Goldman Sachs,<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> <a href="/wiki/NASA" title="NASA">NASA</a> and academic researchers<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> and by major content management systems including <a href="/wiki/Drupal" title="Drupal">Drupal</a>,<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">[</span>9<span class="cite-bracket">]</span></a></sup> and <a href="/wiki/Alfresco_(software)" class="mw-redirect" title="Alfresco (software)">Alfresco (software)</a><sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">[</span>10<span class="cite-bracket">]</span></a></sup> to analyze large amounts of content, and to make it available in common formats using information retrieval techniques. </p><p>On April 4, 2016<sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup> <a href="/wiki/Forbes" title="Forbes">Forbes</a> published an article identifying Tika as one of the key technologies used by more than 400 journalists to analyze 11.5 million leaked documents that expose an international scandal involving world leaders storing money in offshore <a href="/wiki/Shell_corporation" title="Shell corporation">shell corporations</a>. The leaked documents and the project to analyze them is referred to as the <a href="/wiki/Panama_Papers" title="Panama Papers">Panama Papers</a>. </p> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Tika&action=edit&section=4" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Magic_number_(programming)#Magic_numbers_in_files" title="Magic number (programming)">Magic number</a></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Tika&action=edit&section=5" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://tika.apache.org/">"Apache Tika"</a><span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Apache+Tika&rft_id=http%3A%2F%2Ftika.apache.org%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://wiki.apache.org/incubator/TikaProposal">"Tika Proposal"</a><span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Tika+Proposal&rft_id=http%3A%2F%2Fwiki.apache.org%2Fincubator%2FTikaProposal&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://tika.apache.org/1.12/formats.html">"The Apache Software Foundation"</a>. <i>Apache Tika formats page</i><span class="reference-accessdate">. Retrieved <span class="nowrap">16 April</span> 2016</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Apache+Tika+formats+page&rft.atitle=The+Apache+Software+Foundation&rft_id=http%3A%2F%2Ftika.apache.org%2F1.12%2Fformats.html&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://cwiki.apache.org/confluence/display/tika/TikaOCR">"TikaOCR"</a>. Apache Tika. 2019-03-26<span class="reference-accessdate">. Retrieved <span class="nowrap">2019-12-02</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=TikaOCR&rft.pub=Apache+Tika&rft.date=2019-03-26&rft_id=https%3A%2F%2Fcwiki.apache.org%2Fconfluence%2Fdisplay%2Ftika%2FTikaOCR&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://wiki.apache.org/tika/API%20Bindings%20for%20Tika">"API Bindings for Tika"</a>. Apache Tika<span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-17</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=API+Bindings+for+Tika&rft.pub=Apache+Tika&rft_id=https%3A%2F%2Fwiki.apache.org%2Ftika%2FAPI%2520Bindings%2520for%2520Tika&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20160603111240/http://www.fico.com/en/newsroom/fico-to-engage-kaggles-community-of-180000-data-scientists-to-drive-innovation-in-the-fico-analytic-cloud">"FICO to Engage Kaggle's Community of 180,000 Data Scientists to Drive Innovation in the FICO Analytic Cloud | FICO"</a>. <i>FICO | Decisions</i>. Archived from <a rel="nofollow" class="external text" href="https://www.fico.com/en/newsroom/fico-to-engage-kaggles-community-of-180000-data-scientists-to-drive-innovation-in-the-fico-analytic-cloud">the original</a> on 2016-06-03<span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=FICO+%7C+Decisions&rft.atitle=FICO+to+Engage+Kaggle%27s+Community+of+180%2C000+Data+Scientists+to+Drive+Innovation+in+the+FICO+Analytic+Cloud+%7C+FICO&rft_id=http%3A%2F%2Fwww.fico.com%2Fen%2Fnewsroom%2Ffico-to-engage-kaggles-community-of-180000-data-scientists-to-drive-innovation-in-the-fico-analytic-cloud&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation news cs1"><a rel="nofollow" class="external text" href="http://www.informationweek.com/software/enterprise-applications/goldman-sachs-puts-elasticsearch-to-work/d/d-id/1321778">"Goldman Sachs Puts Elasticsearch To Work - InformationWeek"</a>. <i>InformationWeek</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2017-06-21</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=InformationWeek&rft.atitle=Goldman+Sachs+Puts+Elasticsearch+To+Work+-+InformationWeek&rft_id=http%3A%2F%2Fwww.informationweek.com%2Fsoftware%2Fenterprise-applications%2Fgoldman-sachs-puts-elasticsearch-to-work%2Fd%2Fd-id%2F1321778&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://opensource.com/life/15/4/interview-annie-burgess-USC-JPL">"Studying polar data with the help of Apache Tika"</a>. <i>Opensource.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Opensource.com&rft.atitle=Studying+polar+data+with+the+help+of+Apache+Tika&rft_id=https%3A%2F%2Fopensource.com%2Flife%2F15%2F4%2Finterview-annie-burgess-USC-JPL&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.drupal.org/project/text_extract">"Text Extract for Drupal using Tika | Drupal.org"</a>. <i>www.drupal.org</i>. 30 July 2012<span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=www.drupal.org&rft.atitle=Text+Extract+for+Drupal+using+Tika+%7C+Drupal.org&rft.date=2012-07-30&rft_id=https%3A%2F%2Fwww.drupal.org%2Fproject%2Ftext_extract&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://wiki.alfresco.com/wiki/Content_Transformation_and_Metadata_Extraction_with_Apache_Tika">"Content Transformation and Metadata Extraction with Apache Tika - alfrescowiki"</a>. <i>wiki.alfresco.com</i>. 5 June 2015<span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=wiki.alfresco.com&rft.atitle=Content+Transformation+and+Metadata+Extraction+with+Apache+Tika+-+alfrescowiki&rft.date=2015-06-05&rft_id=https%3A%2F%2Fwiki.alfresco.com%2Fwiki%2FContent_Transformation_and_Metadata_Extraction_with_Apache_Tika&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFFox-Brewster" class="citation web cs1">Fox-Brewster, Thomas. <a rel="nofollow" class="external text" href="https://www.forbes.com/sites/thomasbrewster/2016/04/05/panama-papers-amazon-encryption-epic-leak">"From Encrypted Drives To Amazon's Cloud -- The Amazing Flight Of The Panama Papers"</a>. <i>Forbes</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2016-04-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Forbes&rft.atitle=From+Encrypted+Drives+To+Amazon%27s+Cloud+--+The+Amazing+Flight+Of+The+Panama+Papers&rft.aulast=Fox-Brewster&rft.aufirst=Thomas&rft_id=https%3A%2F%2Fwww.forbes.com%2Fsites%2Fthomasbrewster%2F2016%2F04%2F05%2Fpanama-papers-amazon-encryption-epic-leak&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Tika" class="Z3988"></span></span> </li> </ol></div></div> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="The_Apache_Software_Foundation" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Apache_Software_Foundation" title="Template:Apache Software Foundation"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Apache_Software_Foundation" title="Template talk:Apache Software Foundation"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Apache_Software_Foundation" title="Special:EditPage/Template:Apache Software Foundation"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="The_Apache_Software_Foundation" style="font-size:114%;margin:0 4em"><a href="/wiki/The_Apache_Software_Foundation" title="The Apache Software Foundation">The Apache Software Foundation</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Top-level<br />projects</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Accumulo" title="Apache Accumulo">Accumulo</a></li> <li><a href="/wiki/Apache_ActiveMQ" title="Apache ActiveMQ">ActiveMQ</a></li> <li><a href="/wiki/Apache_Airavata" title="Apache Airavata">Airavata</a></li> <li><a href="/wiki/Apache_Airflow" title="Apache Airflow">Airflow</a></li> <li><a href="/wiki/Apache_Allura" title="Apache Allura">Allura</a></li> <li><a href="/wiki/Apache_Ambari" class="mw-redirect" title="Apache Ambari">Ambari</a></li> <li><a href="/wiki/Apache_Ant" title="Apache Ant">Ant</a></li> <li><a href="/wiki/Apache_Aries" title="Apache Aries">Aries</a></li> <li><a href="/wiki/Apache_Arrow" title="Apache Arrow">Arrow</a></li> <li><a href="/wiki/Apache_HTTP_Server" title="Apache HTTP Server">Apache HTTP Server</a></li> <li><a href="/wiki/Apache_Portable_Runtime" title="Apache Portable Runtime">APR</a></li> <li><a href="/wiki/Apache_Avro" title="Apache Avro">Avro</a></li> <li><a href="/wiki/Apache_Axis" title="Apache Axis">Axis</a></li> <li><a href="/wiki/Apache_Axis2" title="Apache Axis2">Axis2</a></li> <li><a href="/wiki/Apache_Beam" title="Apache Beam">Beam</a></li> <li><a href="/wiki/Apache_Bloodhound" class="mw-redirect" title="Apache Bloodhound">Bloodhound</a></li> <li><a href="/wiki/Apache_Brooklyn" title="Apache Brooklyn">Brooklyn</a></li> <li><a href="/wiki/Apache_Calcite" title="Apache Calcite">Calcite</a></li> <li><a href="/wiki/Apache_Camel" title="Apache Camel">Camel</a></li> <li><a href="/wiki/Apache_CarbonData" title="Apache CarbonData">CarbonData</a></li> <li><a href="/wiki/Apache_Cassandra" title="Apache Cassandra">Cassandra</a></li> <li><a href="/wiki/Apache_Cayenne" title="Apache Cayenne">Cayenne</a></li> <li><a href="/wiki/Apache_CloudStack" title="Apache CloudStack">CloudStack</a></li> <li><a href="/wiki/Apache_Cocoon" title="Apache Cocoon">Cocoon</a></li> <li><a href="/wiki/Apache_Cordova" title="Apache Cordova">Cordova</a></li> <li><a href="/wiki/Apache_CouchDB" title="Apache CouchDB">CouchDB</a></li> <li><a href="/wiki/Apache_cTAKES" title="Apache cTAKES">cTAKES</a></li> <li><a href="/wiki/Apache_CXF" title="Apache CXF">CXF</a></li> <li><a href="/wiki/Apache_Derby" title="Apache Derby">Derby</a></li> <li><a href="/wiki/Apache_Directory" title="Apache Directory">Directory</a></li> <li><a href="/wiki/Apache_Drill" title="Apache Drill">Drill</a></li> <li><a href="/wiki/Apache_Druid" title="Apache Druid">Druid</a></li> <li><a href="/wiki/Apache_Empire-db" title="Apache Empire-db">Empire-db</a></li> <li><a href="/wiki/Apache_Felix" title="Apache Felix">Felix</a></li> <li><a href="/wiki/Apache_Flex" title="Apache Flex">Flex</a></li> <li><a href="/wiki/Apache_Flink" title="Apache Flink">Flink</a></li> <li><a href="/wiki/Apache_Flume" class="mw-redirect" title="Apache Flume">Flume</a></li> <li><a href="/wiki/FreeMarker" title="FreeMarker">FreeMarker</a></li> <li><a href="/wiki/Apache_Geronimo" title="Apache Geronimo">Geronimo</a></li> <li><a href="/wiki/Apache_Groovy" title="Apache Groovy">Groovy</a></li> <li><a href="/wiki/Apache_Guacamole" title="Apache Guacamole">Guacamole</a></li> <li><a href="/wiki/Apache_Gump" title="Apache Gump">Gump</a></li> <li><a href="/wiki/Apache_Hadoop" title="Apache Hadoop">Hadoop</a></li> <li><a href="/wiki/Apache_HBase" title="Apache HBase">HBase</a></li> <li><a href="/wiki/Apache_Helix" title="Apache Helix">Helix</a></li> <li><a href="/wiki/Apache_Hive" title="Apache Hive">Hive</a></li> <li><a href="/wiki/Apache_Iceberg" title="Apache Iceberg">Iceberg</a></li> <li><a href="/wiki/Apache_Ignite" title="Apache Ignite">Ignite</a></li> <li><a href="/wiki/Apache_Impala" title="Apache Impala">Impala</a></li> <li><a href="/wiki/Apache_Jackrabbit" title="Apache Jackrabbit">Jackrabbit</a></li> <li><a href="/wiki/Apache_James" title="Apache James">James</a></li> <li><a href="/wiki/Apache_Jena" title="Apache Jena">Jena</a></li> <li><a href="/wiki/Apache_JMeter" title="Apache JMeter">JMeter</a></li> <li><a href="/wiki/Apache_Kafka" title="Apache Kafka">Kafka</a></li> <li><a href="/wiki/Apache_Kudu" title="Apache Kudu">Kudu</a></li> <li><a href="/wiki/Apache_Kylin" title="Apache Kylin">Kylin</a></li> <li><a href="/wiki/Apache_Lucene" title="Apache Lucene">Lucene</a></li> <li><a href="/wiki/Apache_Mahout" title="Apache Mahout">Mahout</a></li> <li><a href="/wiki/Apache_Maven" title="Apache Maven">Maven</a></li> <li><a href="/wiki/Apache_MINA" title="Apache MINA">MINA</a></li> <li><a href="/wiki/Mod_perl" title="Mod perl">mod_perl</a></li> <li><a href="/wiki/Apache_MyFaces" title="Apache MyFaces">MyFaces</a></li> <li><a href="/wiki/Apache_Mynewt" title="Apache Mynewt">Mynewt</a></li> <li><a href="/wiki/Apache_NiFi" title="Apache NiFi">NiFi</a></li> <li><a href="/wiki/NetBeans" title="NetBeans">NetBeans</a></li> <li><a href="/wiki/Apache_Nutch" title="Apache Nutch">Nutch</a></li> <li><a href="/wiki/NuttX" title="NuttX">NuttX</a></li> <li><a href="/wiki/Apache_OFBiz" title="Apache OFBiz">OFBiz</a></li> <li><a href="/wiki/Apache_Oozie" title="Apache Oozie">Oozie</a></li> <li><a href="/wiki/Apache_OpenEJB" title="Apache OpenEJB">OpenEJB</a></li> <li><a href="/wiki/Apache_OpenJPA" title="Apache OpenJPA">OpenJPA</a></li> <li><a href="/wiki/Apache_OpenNLP" title="Apache OpenNLP">OpenNLP</a></li> <li><a href="/wiki/Apache_OpenOffice" title="Apache OpenOffice">OрenOffice</a></li> <li><a href="/wiki/Apache_ORC" title="Apache ORC">ORC</a></li> <li><a href="/wiki/Apache_PDFBox" title="Apache PDFBox">PDFBox</a></li> <li><a href="/wiki/Apache_Parquet" title="Apache Parquet">Parquet</a></li> <li><a href="/wiki/Apache_Phoenix" title="Apache Phoenix">Phoenix</a></li> <li><a href="/wiki/Apache_POI" title="Apache POI">POI</a></li> <li><a href="/wiki/Apache_Pig" title="Apache Pig">Pig</a></li> <li><a href="/wiki/Apache_Pinot" title="Apache Pinot">Pinot</a></li> <li><a href="/wiki/Apache_Pivot" title="Apache Pivot">Pivot</a></li> <li><a href="/wiki/Apache_Qpid" title="Apache Qpid">Qpid</a></li> <li><a href="/wiki/Apache_Roller" title="Apache Roller">Roller</a></li> <li><a href="/wiki/Apache_RocketMQ" title="Apache RocketMQ">RocketMQ</a></li> <li><a href="/wiki/Apache_Samza" title="Apache Samza">Samza</a></li> <li><a href="/wiki/Apache_Shiro" title="Apache Shiro">Shiro</a></li> <li><a href="/wiki/Apache_SINGA" title="Apache SINGA">SINGA</a></li> <li><a href="/wiki/Apache_Sling" title="Apache Sling">Sling</a></li> <li><a href="/wiki/Apache_Solr" title="Apache Solr">Solr</a></li> <li><a href="/wiki/Apache_Spark" title="Apache Spark">Spark</a></li> <li><a href="/wiki/Apache_Storm" title="Apache Storm">Storm</a></li> <li><a href="/wiki/Apache_SpamAssassin" title="Apache SpamAssassin">SpamAssassin</a></li> <li><a href="/wiki/Apache_Struts_1" title="Apache Struts 1">Struts 1</a></li> <li><a href="/wiki/Apache_Struts" title="Apache Struts">Struts 2</a></li> <li><a href="/wiki/Apache_Subversion" title="Apache Subversion">Subversion</a></li> <li><a href="/wiki/Apache_Superset" title="Apache Superset">Superset</a></li> <li><a href="/wiki/Apache_SystemDS" title="Apache SystemDS">SystemDS</a></li> <li><a href="/wiki/Apache_Tapestry" title="Apache Tapestry">Tapestry</a></li> <li><a href="/wiki/Apache_Thrift" title="Apache Thrift">Thrift</a></li> <li><a class="mw-selflink selflink">Tika</a></li> <li><a href="/wiki/Apache_TinkerPop" class="mw-redirect" title="Apache TinkerPop">TinkerPop</a></li> <li><a href="/wiki/Apache_Tomcat" title="Apache Tomcat">Tomcat</a></li> <li><a href="/wiki/Apache_Trafodion" class="mw-redirect" title="Apache Trafodion">Trafodion</a></li> <li><a href="/wiki/Apache_Traffic_Server" title="Apache Traffic Server">Traffic Server</a></li> <li><a href="/wiki/UIMA" title="UIMA">UIMA</a></li> <li><a href="/wiki/Apache_Velocity" title="Apache Velocity">Velocity</a></li> <li><a href="/wiki/Apache_Wicket" title="Apache Wicket">Wicket</a></li> <li><a href="/wiki/Apache_Xalan" title="Apache Xalan">Xalan</a></li> <li><a href="/wiki/Apache_Xerces" title="Apache Xerces">Xerces</a></li> <li><a href="/wiki/Apache_XMLBeans" title="Apache XMLBeans">XMLBeans</a></li> <li>Yetus</li> <li><a href="/wiki/Apache_ZooKeeper" title="Apache ZooKeeper">ZooKeeper</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Apache_Commons" title="Apache Commons">Commons</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Byte_Code_Engineering_Library" title="Byte Code Engineering Library">BCEL</a></li> <li><a href="/wiki/Bean_Scripting_Framework" title="Bean Scripting Framework">BSF</a></li> <li><a href="/wiki/Commons_Daemon" title="Commons Daemon">Daemon</a></li> <li><a href="/wiki/Apache_Jelly" title="Apache Jelly">Jelly</a></li> <li><a href="/wiki/Apache_Commons_Logging" title="Apache Commons Logging">Logging</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Incubator</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Taverna" title="Apache Taverna">Taverna</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Other projects</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Batik" title="Apache Batik">Batik</a></li> <li><a href="/wiki/Apache_FOP_(Formatting_Objects_Processor)" class="mw-redirect" title="Apache FOP (Formatting Objects Processor)">FOP</a></li> <li><a href="/wiki/Apache_Ivy" title="Apache Ivy">Ivy</a></li> <li><a href="/wiki/Log4j" title="Log4j">Log4j</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Attic</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Apex" title="Apache Apex">Apex</a></li> <li><a href="/wiki/AxKit" title="AxKit">AxKit</a></li> <li><a href="/wiki/Apache_Beehive" title="Apache Beehive">Beehive</a></li> <li><a href="/wiki/Apache_iBATIS" title="Apache iBATIS">iBATIS</a></li> <li><a href="/wiki/Apache_Click" title="Apache Click">Click</a></li> <li><a href="/wiki/Apache_Continuum" title="Apache Continuum">Continuum</a></li> <li><a href="/wiki/Deltacloud" title="Deltacloud">Deltacloud</a></li> <li><a href="/wiki/Etch_(protocol)" title="Etch (protocol)">Etch</a></li> <li><a href="/wiki/Apache_Giraph" title="Apache Giraph">Giraph</a></li> <li><a href="/wiki/Apache_Hama" title="Apache Hama">Hama</a></li> <li><a href="/wiki/Apache_Harmony" title="Apache Harmony">Harmony</a></li> <li><a href="/wiki/Jakarta_Project" title="Jakarta Project">Jakarta</a></li> <li><a href="/wiki/Apache_Marmotta" title="Apache Marmotta">Marmotta</a></li> <li><a href="/wiki/Apache_MXNet" title="Apache MXNet">MXNet</a></li> <li><a href="/wiki/Apache_ODE" title="Apache ODE">ODE</a></li> <li><a href="/wiki/Jini" title="Jini">River</a></li> <li><a href="/wiki/Apache_Shale" title="Apache Shale">Shale</a></li> <li><a href="/wiki/Jakarta_Slide" class="mw-redirect" title="Jakarta Slide">Slide</a></li> <li><a href="/wiki/Sqoop" title="Sqoop">Sqoop</a></li> <li><a href="/wiki/Apache_Stanbol" title="Apache Stanbol">Stanbol</a></li> <li><a href="/wiki/Apache_Tuscany" class="mw-redirect" title="Apache Tuscany">Tuscany</a></li> <li><a href="/wiki/Apache_Wave" class="mw-redirect" title="Apache Wave">Wave</a></li> <li><a href="/wiki/Apache_XML" title="Apache XML">XML</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Licenses</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_License" title="Apache License">Apache License</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <b><a href="/wiki/Category:Apache_Software_Foundation" title="Category:Apache Software Foundation">Category</a></b></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐web.codfw.main‐f69cdc8f6‐j4d7q Cached time: 20241124070412 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.436 seconds Real time usage: 0.527 seconds Preprocessor visited node count: 1436/1000000 Post‐expand include size: 48667/2097152 bytes Template argument size: 3254/2097152 bytes Highest expansion depth: 21/100 Expensive parser function count: 3/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 47702/5000000 bytes Lua time usage: 0.289/10.000 seconds Lua memory usage: 6443663/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 460.029 1 -total 38.69% 177.978 2 Template:Infobox 29.94% 137.725 1 Template:Reflist 29.18% 134.257 1 Template:Infobox_software 24.40% 112.235 10 Template:Cite_web 24.06% 110.692 1 Template:Apache_Software_Foundation 23.44% 107.845 1 Template:Navbox 22.47% 103.370 1 Template:Infobox_software/simple 15.24% 70.124 1 Template:Short_description 12.56% 57.783 2 Template:Wikidata --> <!-- Saved in parser cache with key enwiki:pcache:idhash:50189796-0!canonical and timestamp 20241124070412 and revision id 1237951715. Rendering was triggered because: page-view --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Apache_Tika&oldid=1237951715">https://en.wikipedia.org/w/index.php?title=Apache_Tika&oldid=1237951715</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Apache_Software_Foundation_projects" title="Category:Apache Software Foundation projects">Apache Software Foundation projects</a></li><li><a href="/wiki/Category:Java_platform" title="Category:Java platform">Java platform</a></li><li><a href="/wiki/Category:Free_software_programmed_in_Java_(programming_language)" title="Category:Free software programmed in Java (programming language)">Free software programmed in Java (programming language)</a></li><li><a href="/wiki/Category:Java_(programming_language)_libraries" title="Category:Java (programming language) libraries">Java (programming language) libraries</a></li><li><a href="/wiki/Category:Software_using_the_Apache_license" title="Category:Software using the Apache license">Software using the Apache license</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_is_different_from_Wikidata" title="Category:Short description is different from Wikidata">Short description is different from Wikidata</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 1 August 2024, at 09:30<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Apache_Tika&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-f69cdc8f6-j4d7q","wgBackendResponseTime":672,"wgPageParseReport":{"limitreport":{"cputime":"0.436","walltime":"0.527","ppvisitednodes":{"value":1436,"limit":1000000},"postexpandincludesize":{"value":48667,"limit":2097152},"templateargumentsize":{"value":3254,"limit":2097152},"expansiondepth":{"value":21,"limit":100},"expensivefunctioncount":{"value":3,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":47702,"limit":5000000},"entityaccesscount":{"value":1,"limit":400},"timingprofile":["100.00% 460.029 1 -total"," 38.69% 177.978 2 Template:Infobox"," 29.94% 137.725 1 Template:Reflist"," 29.18% 134.257 1 Template:Infobox_software"," 24.40% 112.235 10 Template:Cite_web"," 24.06% 110.692 1 Template:Apache_Software_Foundation"," 23.44% 107.845 1 Template:Navbox"," 22.47% 103.370 1 Template:Infobox_software/simple"," 15.24% 70.124 1 Template:Short_description"," 12.56% 57.783 2 Template:Wikidata"]},"scribunto":{"limitreport-timeusage":{"value":"0.289","limit":"10.000"},"limitreport-memusage":{"value":6443663,"limit":52428800}},"cachereport":{"origin":"mw-web.codfw.main-f69cdc8f6-j4d7q","timestamp":"20241124070412","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Apache Tika","url":"https:\/\/en.wikipedia.org\/wiki\/Apache_Tika","sameAs":"http:\/\/www.wikidata.org\/entity\/Q2858088","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q2858088","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2016-04-15T21:39:44Z","dateModified":"2024-08-01T09:30:22Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/7\/74\/Apache_Tika_Logo.svg","headline":"content detection and analysis framework"}</script> </body> </html>