CINXE.COM
Apache Nutch - Wikipedia
<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>Apache Nutch - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy", "wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"b9b2d7e4-4116-4c09-97d5-86a62aaf2481","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Apache_Nutch","wgTitle":"Apache Nutch","wgCurRevisionId":1245172641,"wgRevisionId":1245172641,"wgArticleId":398847,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","All articles with unsourced statements","Articles with unsourced statements from October 2015","Official website different in Wikidata and Wikipedia","Apache Software Foundation projects","Internet search engines","Free search engine software","Java (programming language) libraries","Cross-platform free software","Free web crawlers"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en", "wgPageContentModel":"wikitext","wgRelevantPageName":"Apache_Nutch","wgRelevantArticleId":398847,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":10000,"wgRelatedArticlesCompat":[],"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q1372248","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness", "fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.tablesorter.styles":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready","ext.wikimediaBadges":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","mediawiki.page.media","site","mediawiki.page.ready","jquery.tablesorter","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP", "ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession","wikibase.sidebar.tracking"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cjquery.tablesorter.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&only=styles&skin=vector-2022"> <script async="" src="/w/load.php?lang=en&modules=startup&only=scripts&raw=1&skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&modules=site.styles&only=styles&skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.5"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/en/thumb/e/e0/NutchScreenshot.png/1200px-NutchScreenshot.png"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="750"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/en/thumb/e/e0/NutchScreenshot.png/800px-NutchScreenshot.png"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="500"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/en/thumb/e/e0/NutchScreenshot.png/640px-NutchScreenshot.png"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="400"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="Apache Nutch - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/Apache_Nutch"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Apache_Nutch&action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/Apache_Nutch"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="//login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Apache_Nutch rootpage-Apache_Nutch skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page's font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&returnto=Apache+Nutch" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&returnto=Apache+Nutch" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&returnto=Apache+Nutch" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&returnto=Apache+Nutch" title="You're encouraged to log in; however, it's not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Features" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Features"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Features</span> </div> </a> <ul id="toc-Features-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-History" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#History"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>History</span> </div> </a> <button aria-controls="toc-History-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle History subsection</span> </button> <ul id="toc-History-sublist" class="vector-toc-list"> <li id="toc-Release_history" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Release_history"> <div class="vector-toc-text"> <span class="vector-toc-numb">2.1</span> <span>Release history</span> </div> </a> <ul id="toc-Release_history-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Scalability" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Scalability"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Scalability</span> </div> </a> <ul id="toc-Scalability-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Related_projects" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Related_projects"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Related projects</span> </div> </a> <ul id="toc-Related_projects-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Search_engines_built_with_Nutch" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Search_engines_built_with_Nutch"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Search engines built with Nutch</span> </div> </a> <ul id="toc-Search_engines_built_with_Nutch-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Bibliography" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Bibliography"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>Bibliography</span> </div> </a> <ul id="toc-Bibliography-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Apache Nutch</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 11 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-11" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">11 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Nutch" title="Nutch – Catalan" lang="ca" hreflang="ca" data-title="Nutch" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-de mw-list-item"><a href="https://de.wikipedia.org/wiki/Nutch" title="Nutch – German" lang="de" hreflang="de" data-title="Nutch" data-language-autonym="Deutsch" data-language-local-name="German" class="interlanguage-link-target"><span>Deutsch</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Nutch" title="Nutch – Spanish" lang="es" hreflang="es" data-title="Nutch" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Nutch" title="Nutch – French" lang="fr" hreflang="fr" data-title="Nutch" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/%EC%95%84%ED%8C%8C%EC%B9%98_%EB%84%88%EC%B9%98" title="아파치 너치 – Korean" lang="ko" hreflang="ko" data-title="아파치 너치" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-it mw-list-item"><a href="https://it.wikipedia.org/wiki/Nutch" title="Nutch – Italian" lang="it" hreflang="it" data-title="Nutch" data-language-autonym="Italiano" data-language-local-name="Italian" class="interlanguage-link-target"><span>Italiano</span></a></li><li class="interlanguage-link interwiki-nl mw-list-item"><a href="https://nl.wikipedia.org/wiki/Apache_Nutch" title="Apache Nutch – Dutch" lang="nl" hreflang="nl" data-title="Apache Nutch" data-language-autonym="Nederlands" data-language-local-name="Dutch" class="interlanguage-link-target"><span>Nederlands</span></a></li><li class="interlanguage-link interwiki-pl mw-list-item"><a href="https://pl.wikipedia.org/wiki/Apache_Nutch" title="Apache Nutch – Polish" lang="pl" hreflang="pl" data-title="Apache Nutch" data-language-autonym="Polski" data-language-local-name="Polish" class="interlanguage-link-target"><span>Polski</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/Nutch" title="Nutch – Russian" lang="ru" hreflang="ru" data-title="Nutch" data-language-autonym="Русский" data-language-local-name="Russian" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/Nutch" title="Nutch – Turkish" lang="tr" hreflang="tr" data-title="Nutch" data-language-autonym="Türkçe" data-language-local-name="Turkish" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/Nutch" title="Nutch – Ukrainian" lang="uk" hreflang="uk" data-title="Nutch" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q1372248#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Apache_Nutch" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:Apache_Nutch" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/Apache_Nutch"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Apache_Nutch&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=Apache_Nutch&action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Apache_Nutch"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Apache_Nutch&action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Apache_Nutch&action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Apache_Nutch" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Apache_Nutch" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Apache_Nutch&oldid=1245172641" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Apache_Nutch&action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&page=Apache_Nutch&id=1245172641&wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FApache_Nutch"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FApache_Nutch"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&page=Apache_Nutch&action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Apache_Nutch&printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="wb-otherproject-link wb-otherproject-commons mw-list-item"><a href="https://commons.wikimedia.org/wiki/Category:Apache_Nutch" hreflang="en"><span>Wikimedia Commons</span></a></li><li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q1372248" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Open source web crawler</div> <style data-mw-deduplicate="TemplateStyles:r1257001546">.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme) div:not(.notheme){background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table tr{display:table-row!important}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}</style><table class="infobox vevent"><caption class="infobox-title summary">Apache Nutch</caption><tbody><tr><td colspan="2" class="infobox-image logo"><span class="mw-default-size" typeof="mw:File/Frameless"><a href="/wiki/File:Apache_Nutch_logo.svg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/d/d1/Apache_Nutch_logo.svg/120px-Apache_Nutch_logo.svg.png" decoding="async" width="120" height="45" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/d/d1/Apache_Nutch_logo.svg/180px-Apache_Nutch_logo.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/d/d1/Apache_Nutch_logo.svg/240px-Apache_Nutch_logo.svg.png 2x" data-file-width="512" data-file-height="190" /></a></span></td></tr><tr><td colspan="2" class="infobox-image logo"><style data-mw-deduplicate="TemplateStyles:r1214851843">.mw-parser-output .hidden-begin{box-sizing:border-box;width:100%;padding:5px;border:none;font-size:95%}.mw-parser-output .hidden-title{font-weight:bold;line-height:1.6;text-align:left}.mw-parser-output .hidden-content{text-align:left}@media all and (max-width:500px){.mw-parser-output .hidden-begin{width:auto!important;clear:none!important;float:none!important}}</style><div class="hidden-begin mw-collapsible mw-collapsed" style=""><div class="hidden-title skin-nightmode-reset-color" style="background:gainsboro;text-align:center">Screenshot</div><div class="hidden-content mw-collapsible-content" style="text-align:center"> <span typeof="mw:File"><a href="/wiki/File:NutchScreenshot.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/en/thumb/e/e0/NutchScreenshot.png/250px-NutchScreenshot.png" decoding="async" width="250" height="156" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/e/e0/NutchScreenshot.png/375px-NutchScreenshot.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/e/e0/NutchScreenshot.png/500px-NutchScreenshot.png 2x" data-file-width="1280" data-file-height="800" /></a></span><div class="infobox-caption">Nutch Web Interface Search</div></div></div></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Programmer" title="Programmer">Original author(s)</a></th><td class="infobox-data"><a href="/wiki/Doug_Cutting" title="Doug Cutting">Doug Cutting</a>, <a href="/wiki/Mike_Cafarella" title="Mike Cafarella">Mike Cafarella</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Programmer" title="Programmer">Developer(s)</a></th><td class="infobox-data"><a href="/wiki/Apache_Software_Foundation" class="mw-redirect" title="Apache Software Foundation">Apache Software Foundation</a></td></tr><tr style="display: none;"><td colspan="2" class="infobox-full-data"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1257001546"></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_release_life_cycle" title="Software release life cycle">Stable release</a></th><td class="infobox-data"><div style="margin:0px;"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1257001546"><table class="infobox-subbox"><tbody><tr><th scope="row" class="infobox-label">1.x</th><td class="infobox-data">1.20 / 24 April 2024<span class="noprint">; 7 months ago</span><span style="display:none"> (<span class="bday dtstart published updated">2024-04-24</span>)</span><sup id="cite_ref-downloads_1-0" class="reference"><a href="#cite_note-downloads-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup></td></tr><tr><th scope="row" class="infobox-label">2.x</th><td class="infobox-data">2.4 / 11 October 2019<span class="noprint">; 5 years ago</span><span style="display:none"> (<span class="bday dtstart published updated">2019-10-11</span>)</span><sup id="cite_ref-downloads_1-1" class="reference"><a href="#cite_note-downloads-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup></td></tr></tbody></table> </div></td></tr><tr style="display:none"><td colspan="2"> </td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Repository_(version_control)" title="Repository (version control)">Repository</a></th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://github.com/apache/nutch">Nutch Github Repository</a></span></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Written in</th><td class="infobox-data"><a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Operating_system" title="Operating system">Operating system</a></th><td class="infobox-data"><a href="/wiki/Cross-platform" class="mw-redirect" title="Cross-platform">Cross-platform</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_categories#Categorization_approaches" title="Software categories">Type</a></th><td class="infobox-data"><a href="/wiki/Web_crawler" title="Web crawler">Web crawler</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_license" title="Software license">License</a></th><td class="infobox-data"><a href="/wiki/Apache_License_2.0" class="mw-redirect" title="Apache License 2.0">Apache License 2.0</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Website</th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://nutch.apache.org">nutch<wbr />.apache<wbr />.org</a></span></td></tr></tbody></table> <p><b>Apache Nutch</b> is a highly extensible and scalable <a href="/wiki/Open-source_license" title="Open-source license">open source</a> <a href="/wiki/Web_crawler" title="Web crawler">web crawler</a> software project. </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Features">Features</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=1" title="Edit section: Features"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-default-size" typeof="mw:File/Thumb"><a href="/wiki/File:Nutch.png" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/5/53/Nutch.png" decoding="async" width="135" height="87" class="mw-file-element" data-file-width="135" data-file-height="87" /></a><figcaption>Nutch robot mascot</figcaption></figure> <p>Nutch is coded entirely in the <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java programming language</a>, but data is written in language-independent formats. It has a highly modular architecture, allowing developers to create plug-ins for media-type parsing, data retrieval, querying and clustering. </p><p>The fetcher ("robot" or "<a href="/wiki/Web_crawler" title="Web crawler">web crawler</a>") has been written from scratch specifically for this project. </p> <div class="mw-heading mw-heading2"><h2 id="History">History</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=2" title="Edit section: History"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Nutch originated with <a href="/wiki/Doug_Cutting" title="Doug Cutting">Doug Cutting</a>, creator of both <a href="/wiki/Lucene" class="mw-redirect" title="Lucene">Lucene</a> and <a href="/wiki/Hadoop" class="mw-redirect" title="Hadoop">Hadoop</a>, and <a href="/wiki/Mike_Cafarella" title="Mike Cafarella">Mike Cafarella</a>. </p><p>In June, 2003, a successful 100-million-page demonstration system was developed. To meet the multi-machine processing needs of the crawl and index tasks, the Nutch project has also implemented a <a href="/wiki/MapReduce" title="MapReduce">MapReduce</a> facility and a <a href="/wiki/Distributed_file_system" class="mw-redirect" title="Distributed file system">distributed file system</a>. The two facilities have been spun out into their own subproject, called <a href="/wiki/Hadoop" class="mw-redirect" title="Hadoop">Hadoop</a>. </p><p>In January, 2005, Nutch joined the Apache Incubator, from which it graduated to become a subproject of Lucene in June of that same year. Since April, 2010, Nutch has been considered an independent, top level project of the <a href="/wiki/Apache_Software_Foundation" class="mw-redirect" title="Apache Software Foundation">Apache Software Foundation</a>.<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup> </p><p>In February 2014 the <a href="/wiki/Common_Crawl" title="Common Crawl">Common Crawl</a> project adopted Nutch for its open, large-scale web crawl.<sup id="cite_ref-:0_3-0" class="reference"><a href="#cite_note-:0-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup> </p><p>While it was once a goal for the Nutch project to release a global large-scale web search engine, that is no longer the case.<sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">[<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (October 2015)">citation needed</span></a></i>]</sup> </p> <div class="mw-heading mw-heading3"><h3 id="Release_history">Release history</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=3" title="Edit section: Release history"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable sortable" style="width: 80%"> <tbody><tr> <th style="width: 5%">1.x <p>Branch </p> </th> <th style="width: 5%">2.x <p>Branch </p> </th> <th style="width: 10%">Release date </th> <th style="width: 60%" class="unsortable">Description </th></tr> <tr> <td>1.1 </td> <td> </td> <td>2010-06-06 </td> <td>This release includes several major upgrades of existing libraries (Hadoop, Solr, Tika, etc.) on which Nutch depends. Various bug fixes, and speedups (e.g., to Fetcher2) have also been included. </td></tr> <tr> <td>1.2 </td> <td> </td> <td>2010-10-24 </td> <td>This release includes several improvements (addition of parse-html as a selectable parser again, configurable per-field indexing), new features (including adding timing information to all Tool classes, and implementation of parser timeouts), and bug fixes (fixing an NPE in distributed search, fixing of XML formatting issues per Document fields). </td></tr> <tr> <td>1.3 </td> <td> </td> <td>2011-06-07 </td> <td>This release includes several improvements (improved RSS parsing support, tighter integration with Apache Tika, external parsing support, improved language identification and an order of magnitude smaller source release tarball—only about 2 MB). </td></tr> <tr> <td>1.4 </td> <td> </td> <td>2011-11-26 </td> <td>This release includes several improvements including allowing Parsers to declare support for multiple MIME types, configurable Fetcher Queue depth, Fetcher speed improvements, tighter Tika integration, and support for HTTP auth in Solr indexing. </td></tr> <tr> <td>1.5 </td> <td> </td> <td>2012-06-07 </td> <td>This release includes several improvements including upgrades of several major components including Tika 1.1 and Hadoop 1.0.0, improvements to LinkRank and WebGraph elements as well as a number of new plugins covering blacklisting, filtering and parsing to name a few. </td></tr> <tr> <td> </td> <td>2.0 </td> <td>2012-07-07 </td> <td>This release offers users an edition focused on large scale crawling which builds on storage abstraction (via Apache Gora) for big data stores such as Apache Accumulo, Apache Avro, Apache Cassandra, Apache HBase, HDFS, an in memory data store and various high-profile SQL stores. </td></tr> <tr> <td>1.5.1 </td> <td> </td> <td>2012-07-10 </td> <td>This release is a maintenance release of the popular 1.5.X mainstream version of Nutch which has been widely adopted within the community. </td></tr> <tr> <td> </td> <td>2.1 </td> <td>2012-10-05 </td> <td>This release continues to provide Nutch users with a simplified Nutch distribution building on the 2.x development drive which is growing in popularity amongst the community. As well as addressing ~20 bugs this release also offers improved properties for better Solr configuration, upgrades to various Gora dependencies and the introduction of the option to build indexes in elastic search. </td></tr> <tr> <td>1.6 </td> <td> </td> <td>2012-12-06 </td> <td>This release includes over 20 bug fixes, the same in improvements, as well as new functionalities including a new HostNormalizer, the ability to dynamically set fetchInterval by MIME-type and functional enhancements to the Indexer API including the normalization of URLs and the deletion of robots noIndex documents. Other notable improvements include the upgrade of key dependencies to Tika 1.2 and Automaton 1.11-8. </td></tr> <tr> <td> </td> <td>2.2 </td> <td>2013-06-08 </td> <td>This release includes over 30 bug fixes and over 25 improvements representing the third release of increasingly popular 2.x Nutch series. This release features inclusion of Crawler-Commons which Nutch now utilizes for improved robots.txt parsing, library upgrades to Apache Hadoop 1.1.1, Apache Gora 0.3, Apache Tika 1.2 and Automaton 1.11-8. </td></tr> <tr> <td>1.7 </td> <td> </td> <td>2013-06-24 </td> <td>This release includes over 20 bug fixes, as many improvements; most noticeably featuring a new pluggable indexing architecture which currently supports Apache Solr and Elastic Search. Shadowing the recent Nutch 2.2 release, parsing of Robots.txt is now delegated to Crawler-Commons. Key library upgrades have been made to Apache Hadoop 1.2.0 and Apache Tika 1.3. </td></tr> <tr> <td> </td> <td>2.2.1 </td> <td>2013-07-02 </td> <td>This release includes library upgrades to Apache Hadoop 1.2.0 and Apache Tika 1.3, it is predominantly a bug fix for NUTCH-1591 - Incorrect conversion of ByteBuffer to String. </td></tr> <tr> <td>1.8 </td> <td> </td> <td>2014-03-17 </td> <td>Although this release includes library upgrades to Crawler Commons 0.3 and Apache Tika 1.5, it also provides over 30 bug fixes as well as 18 improvements. </td></tr> <tr> <td> </td> <td>2.3 </td> <td>2015-01-22 </td> <td>Nutch 2.3 release now comes packaged with a self-contained Apache Wicket-based Web Application. The SQL backend for Gora has been deprecated.<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span class="cite-bracket">[</span>4<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td>1.10 </td> <td> </td> <td>2015-05-06 </td> <td>This release includes library upgrades to Tika 1.6, also provides over 46 bug fixes as well as 37 improvements and 12 new features.<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">[</span>5<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td>1.11 </td> <td> </td> <td>2015-12-07 </td> <td>This release includes library upgrades to Hadoop 2.X, Tika 1.11, also provides over 32 bug fixes as well as 35 improvements and 14 new features.<sup id="cite_ref-6" class="reference"><a href="#cite_note-6"><span class="cite-bracket">[</span>6<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td> </td> <td>2.3.1 </td> <td>2016-01-21 </td> <td>This bug fix release contains around 40 issues addressed. </td></tr> <tr> <td>1.12 </td> <td> </td> <td>2016-06-18 </td> <td> </td></tr> <tr> <td>1.13 </td> <td> </td> <td>2017-04-02 </td> <td> </td></tr> <tr> <td>1.14 </td> <td> </td> <td>2017-12-23 </td> <td> </td></tr> <tr> <td>1.15 </td> <td> </td> <td>2018-08-09 </td> <td> </td></tr> <tr> <td>1.16 </td> <td> </td> <td>2019-10-11 </td> <td> </td></tr> <tr> <td> </td> <td>2.4 </td> <td>2019-10-11 </td> <td>Expected to be the last release on the 2.X series, as "no committer is actively working on it".<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">[</span>7<span class="cite-bracket">]</span></a></sup> </td></tr> <tr> <td>1.17 </td> <td> </td> <td>2020-07-02 </td> <td> </td></tr> <tr> <td>1.18 </td> <td> </td> <td>2021-01-24 </td> <td> </td></tr> <tr> <td>1.19 </td> <td> </td> <td>2022-08-22 </td> <td> </td></tr> <tr> <td>1.20 </td> <td> </td> <td>2024-04-09 </td> <td> </td></tr></tbody></table> <div class="mw-heading mw-heading2"><h2 id="Scalability">Scalability</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=4" title="Edit section: Scalability"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>IBM Research studied the performance<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">[</span>8<span class="cite-bracket">]</span></a></sup> of Nutch/Lucene as part of its Commercial Scale Out (CSO) project.<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">[</span>9<span class="cite-bracket">]</span></a></sup> Their findings were that a <a href="/wiki/Scalability#HORIZONTAL-SCALING" title="Scalability">scale-out</a> system, such as Nutch/Lucene, could achieve a performance level on a cluster of blades that was not achievable on any <a href="/wiki/Scalability#VERTICAL-SCALING" title="Scalability">scale-up</a> computer such as the <a href="/wiki/POWER5" title="POWER5">POWER5</a>. </p><p>The ClueWeb09 dataset (used in e.g. <a href="/wiki/Text_Retrieval_Conference" title="Text Retrieval Conference">TREC</a>) was gathered using Nutch, with an average speed of 755.31 documents per second.<sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">[</span>10<span class="cite-bracket">]</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Related_projects">Related projects</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=5" title="Edit section: Related projects"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Hadoop" class="mw-redirect" title="Hadoop">Hadoop</a> – Java framework that supports distributed applications running on large clusters.</li></ul> <div class="mw-heading mw-heading2"><h2 id="Search_engines_built_with_Nutch">Search engines built with Nutch</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=6" title="Edit section: Search engines built with Nutch"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/Common_Crawl" title="Common Crawl">Common Crawl</a> – publicly available internet-wide crawls, started using Nutch in 2014.<sup id="cite_ref-:0_3-1" class="reference"><a href="#cite_note-:0-3"><span class="cite-bracket">[</span>3<span class="cite-bracket">]</span></a></sup></li> <li><a href="/wiki/Creative_Commons" title="Creative Commons">Creative Commons</a> Search – an implementation of Nutch, used in the period of 2004–2006.<sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">[</span>11<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">[</span>12<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-13" class="reference"><a href="#cite_note-13"><span class="cite-bracket">[</span>13<span class="cite-bracket">]</span></a></sup></li> <li><a href="/w/index.php?title=DiscoverEd&action=edit&redlink=1" class="new" title="DiscoverEd (page does not exist)">DiscoverEd</a> – <a href="/wiki/Open_educational_resources" title="Open educational resources">Open educational resources</a> search prototype developed by Creative Commons</li> <li><a href="/wiki/Krugle" title="Krugle">Krugle</a> uses Nutch to crawl web pages for code, archives and technically interesting content.</li> <li><a href="/wiki/MozDex" title="MozDex">mozDex</a> (inactive)</li> <li><a href="/wiki/Wikia_Search" title="Wikia Search">Wikia Search</a> - launched 2008, closed down 2009<sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">[</span>14<span class="cite-bracket">]</span></a></sup><sup id="cite_ref-15" class="reference"><a href="#cite_note-15"><span class="cite-bracket">[</span>15<span class="cite-bracket">]</span></a></sup></li></ul> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=7" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239009302">.mw-parser-output .portalbox{padding:0;margin:0.5em 0;display:table;box-sizing:border-box;max-width:175px;list-style:none}.mw-parser-output .portalborder{border:1px solid var(--border-color-base,#a2a9b1);padding:0.1em;background:var(--background-color-neutral-subtle,#f8f9fa)}.mw-parser-output .portalbox-entry{display:table-row;font-size:85%;line-height:110%;height:1.9em;font-style:italic;font-weight:bold}.mw-parser-output .portalbox-image{display:table-cell;padding:0.2em;vertical-align:middle;text-align:center}.mw-parser-output .portalbox-link{display:table-cell;padding:0.2em 0.2em 0.2em 0.3em;vertical-align:middle}@media(min-width:720px){.mw-parser-output .portalleft{clear:left;float:left;margin:0.5em 1em 0.5em 0}.mw-parser-output .portalright{clear:right;float:right;margin:0.5em 0 0.5em 1em}}</style><ul role="navigation" aria-label="Portals" class="noprint portalbox portalborder portalright"> <li class="portalbox-entry"><span class="portalbox-image"><span class="noviewer" typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Free_and_open-source_software_logo_%282009%29.svg/28px-Free_and_open-source_software_logo_%282009%29.svg.png" decoding="async" width="28" height="28" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/31/Free_and_open-source_software_logo_%282009%29.svg/42px-Free_and_open-source_software_logo_%282009%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/31/Free_and_open-source_software_logo_%282009%29.svg/56px-Free_and_open-source_software_logo_%282009%29.svg.png 2x" data-file-width="512" data-file-height="512" /></span></span></span><span class="portalbox-link"><a href="/wiki/Portal:Free_and_open-source_software" title="Portal:Free and open-source software">Free and open-source software portal</a></span></li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=8" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist"> <div class="mw-references-wrap mw-references-columns"><ol class="references"> <li id="cite_note-downloads-1"><span class="mw-cite-backlink">^ <a href="#cite_ref-downloads_1-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-downloads_1-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nutch.apache.org/download/">"Apache Nutch™ - Downloads"</a><span class="reference-accessdate">. Retrieved <span class="nowrap">11 June</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Apache+Nutch%E2%84%A2+-+Downloads&rft_id=https%3A%2F%2Fnutch.apache.org%2Fdownload%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nutch.apache.org/#News">"Apache Nutch -"</a>. <i>nutch.apache.org</i>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=nutch.apache.org&rft.atitle=Apache+Nutch+-&rft_id=http%3A%2F%2Fnutch.apache.org%2F%23News&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-:0-3"><span class="mw-cite-backlink">^ <a href="#cite_ref-:0_3-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:0_3-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://blog.commoncrawl.org/2014/02/common-crawl-move-to-nutch/">"Common Crawl's Move to Nutch – Common Crawl – Blog"</a>. <i>blog.commoncrawl.org</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2015-10-14</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=blog.commoncrawl.org&rft.atitle=Common+Crawl%27s+Move+to+Nutch+%E2%80%93+Common+Crawl+%E2%80%93+Blog&rft_id=http%3A%2F%2Fblog.commoncrawl.org%2F2014%2F02%2Fcommon-crawl-move-to-nutch%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nutch.apache.org/#22-january-2015-nutch-23-release">"Nutch 2.3 Release"</a>. <i>Apache Nutch News</i>. The Apache Software Foundation. 22 January 2015<span class="reference-accessdate">. Retrieved <span class="nowrap">18 January</span> 2016</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=Apache+Nutch+News&rft.atitle=Nutch+2.3+Release&rft.date=2015-01-22&rft_id=http%3A%2F%2Fnutch.apache.org%2F%2322-january-2015-nutch-23-release&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680&version=12327187">"Nutch 1.10 Release Notes"</a>. <i>ASF JIRA</i>. The Apache Software Foundation. 6 May 2015<span class="reference-accessdate">. Retrieved <span class="nowrap">18 January</span> 2016</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=ASF+JIRA&rft.atitle=Nutch+1.10+Release+Notes&rft.date=2015-05-06&rft_id=https%3A%2F%2Fissues.apache.org%2Fjira%2Fsecure%2FReleaseNote.jspa%3FprojectId%3D10680%26version%3D12327187&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-6">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680&version=12329358">"Nutch 1.11 Release Notes"</a>. <i>ASF JIRA</i>. The Apache Software Foundation. 7 December 2015<span class="reference-accessdate">. Retrieved <span class="nowrap">18 January</span> 2016</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=unknown&rft.jtitle=ASF+JIRA&rft.atitle=Nutch+1.11+Release+Notes&rft.date=2015-12-07&rft_id=https%3A%2F%2Fissues.apache.org%2Fjira%2Fsecure%2FReleaseNote.jspa%3FprojectId%3D10680%26version%3D12329358&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation news cs1"><a rel="nofollow" class="external text" href="https://nutch.apache.org/news/legacy-nutch-news/#11-october-2019---nutch-24-release">"Nutch 2.4 Release"</a>. <i>Apache Nutch News</i>. The Apache Software Foundation. 11 October 2019<span class="reference-accessdate">. Retrieved <span class="nowrap">20 May</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Apache+Nutch+News&rft.atitle=Nutch+2.4+Release&rft.date=2019-10-11&rft_id=https%3A%2F%2Fnutch.apache.org%2Fnews%2Flegacy-nutch-news%2F%2311-october-2019---nutch-24-release&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://www.cecs.uci.edu/~papers/ipdps07/pdfs/SMTPS-201-paper-1.pdf">"Scalability of the Nutch search engine"</a> <span class="cs1-format">(PDF)</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Scalability+of+the+Nutch+search+engine&rft_id=http%3A%2F%2Fwww.cecs.uci.edu%2F~papers%2Fipdps07%2Fpdfs%2FSMTPS-201-paper-1.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20081203064621/http://weather.ou.edu/~apw/projects/cso/prov_paper.pdf">"Base Operating System Provisioning and Bringup for a Commercial Supercomputer"</a> <span class="cs1-format">(PDF)</span>. Archived from <a rel="nofollow" class="external text" href="http://weather.ou.edu/~apw/projects/cso/prov_paper.pdf">the original</a> <span class="cs1-format">(PDF)</span> on December 3, 2008.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Base+Operating+System+Provisioning+and+Bringup+for+a+Commercial+Supercomputer&rft_id=http%3A%2F%2Fweather.ou.edu%2F~apw%2Fprojects%2Fcso%2Fprov_paper.pdf&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="http://boston.lti.cs.cmu.edu/crawler/crawlerstats.html">The Sapphire Web Crawler - Crawl Statistics</a>. Boston.lti.cs.cmu.edu (2008-10-01). Retrieved on 2013-07-21.</span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://creativecommons.org/weblog/entry/4388">"Our Updated Search"</a>. Creative Commons. 2004-09-03.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Our+Updated+Search&rft.pub=Creative+Commons&rft.date=2004-09-03&rft_id=https%3A%2F%2Fcreativecommons.org%2Fweblog%2Fentry%2F4388&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20100107065707/http://creativecommons.org/press-releases/entry/5064">"Creative Commons Unique Search Tool Now Integrated into Firefox 1.0"</a>. Creative Commons. 2004-11-22. Archived from <a rel="nofollow" class="external text" href="https://creativecommons.org/press-releases/entry/5064">the original</a> on 2010-01-07.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Creative+Commons+Unique+Search+Tool+Now+Integrated+into+Firefox+1.0&rft.pub=Creative+Commons&rft.date=2004-11-22&rft_id=https%3A%2F%2Fcreativecommons.org%2Fpress-releases%2Fentry%2F5064&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-13">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://creativecommons.org/weblog/entry/6002">"New CC search UI"</a>. Creative Commons. 2006-08-02.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=New+CC+search+UI&rft.pub=Creative+Commons&rft.date=2006-08-02&rft_id=https%3A%2F%2Fcreativecommons.org%2Fweblog%2Fentry%2F6002&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20111104010718/http://answers.wikia.com/wiki/Where_can_I_get_the_source_code_for_Wikia_Search">"Where can I get the source code for Wikia Search?"</a>. Archived from <a rel="nofollow" class="external text" href="http://answers.wikia.com/wiki/Where_can_I_get_the_source_code_for_Wikia_Search">the original</a> on 2011-11-04<span class="reference-accessdate">. Retrieved <span class="nowrap">2010-02-12</span></span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Where+can+I+get+the+source+code+for+Wikia+Search%3F&rft_id=http%3A%2F%2Fanswers.wikia.com%2Fwiki%2FWhere_can_I_get_the_source_code_for_Wikia_Search&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> <li id="cite_note-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-15">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://jimmywales.com/2009/03/31/update-on-wikia/">"Update on Wikia – doing more of what's working | Jimmy Wales"</a>. 31 March 2009.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=unknown&rft.btitle=Update+on+Wikia+%E2%80%93+doing+more+of+what%27s+working+%26%23124%3B+Jimmy+Wales&rft.date=2009-03-31&rft_id=http%3A%2F%2Fjimmywales.com%2F2009%2F03%2F31%2Fupdate-on-wikia%2F&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></span> </li> </ol></div></div> <div class="mw-heading mw-heading2"><h2 id="Bibliography">Bibliography</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=9" title="Edit section: Bibliography"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239549316">.mw-parser-output .refbegin{margin-bottom:0.5em}.mw-parser-output .refbegin-hanging-indents>ul{margin-left:0}.mw-parser-output .refbegin-hanging-indents>ul>li{margin-left:0;padding-left:3.2em;text-indent:-3.2em}.mw-parser-output .refbegin-hanging-indents ul,.mw-parser-output .refbegin-hanging-indents ul li{list-style:none}@media(max-width:720px){.mw-parser-output .refbegin-hanging-indents>ul>li{padding-left:1.6em;text-indent:-1.6em}}.mw-parser-output .refbegin-columns{margin-top:0.3em}.mw-parser-output .refbegin-columns ul{margin-top:0}.mw-parser-output .refbegin-columns li{page-break-inside:avoid;break-inside:avoid-column}@media screen{.mw-parser-output .refbegin{font-size:90%}}</style><div class="refbegin" style=""> <ul><li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222"><cite id="CITEREFShoberg2006" class="citation book cs1">Shoberg, J (October 26, 2006). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20091202104144/http://www.apress.com/book/view/9781590596876"><i>Building Search Applications with Lucene and Nutch</i></a> (1st ed.). <a href="/wiki/Apress" class="mw-redirect" title="Apress">Apress</a>. p. 350. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a> <a href="/wiki/Special:BookSources/978-1-59059-687-6" title="Special:BookSources/978-1-59059-687-6"><bdi>978-1-59059-687-6</bdi></a>. Archived from <a rel="nofollow" class="external text" href="http://www.apress.com/book/view/9781590596876">the original</a> on December 2, 2009<span class="reference-accessdate">. Retrieved <span class="nowrap">August 15,</span> 2009</span>.</cite><span title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Building+Search+Applications+with+Lucene+and+Nutch&rft.pages=350&rft.edition=1st&rft.pub=Apress&rft.date=2006-10-26&rft.isbn=978-1-59059-687-6&rft.aulast=Shoberg&rft.aufirst=J&rft_id=http%3A%2F%2Fwww.apress.com%2Fbook%2Fview%2F9781590596876&rfr_id=info%3Asid%2Fen.wikipedia.org%3AApache+Nutch" class="Z3988"></span></li></ul> </div> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Apache_Nutch&action=edit&section=10" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><span class="official-website"><span class="url"><a rel="nofollow" class="external text" href="https://nutch.apache.org">Official website</a></span></span></li></ul> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="The_Apache_Software_Foundation" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Apache_Software_Foundation" title="Template:Apache Software Foundation"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Apache_Software_Foundation" title="Template talk:Apache Software Foundation"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Apache_Software_Foundation" title="Special:EditPage/Template:Apache Software Foundation"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="The_Apache_Software_Foundation" style="font-size:114%;margin:0 4em"><a href="/wiki/The_Apache_Software_Foundation" title="The Apache Software Foundation">The Apache Software Foundation</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">Top-level<br />projects</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Accumulo" title="Apache Accumulo">Accumulo</a></li> <li><a href="/wiki/Apache_ActiveMQ" title="Apache ActiveMQ">ActiveMQ</a></li> <li><a href="/wiki/Apache_Airavata" title="Apache Airavata">Airavata</a></li> <li><a href="/wiki/Apache_Airflow" title="Apache Airflow">Airflow</a></li> <li><a href="/wiki/Apache_Allura" title="Apache Allura">Allura</a></li> <li><a href="/wiki/Apache_Ambari" class="mw-redirect" title="Apache Ambari">Ambari</a></li> <li><a href="/wiki/Apache_Ant" title="Apache Ant">Ant</a></li> <li><a href="/wiki/Apache_Aries" title="Apache Aries">Aries</a></li> <li><a href="/wiki/Apache_Arrow" title="Apache Arrow">Arrow</a></li> <li><a href="/wiki/Apache_HTTP_Server" title="Apache HTTP Server">Apache HTTP Server</a></li> <li><a href="/wiki/Apache_Portable_Runtime" title="Apache Portable Runtime">APR</a></li> <li><a href="/wiki/Apache_Avro" title="Apache Avro">Avro</a></li> <li><a href="/wiki/Apache_Axis" title="Apache Axis">Axis</a></li> <li><a href="/wiki/Apache_Axis2" title="Apache Axis2">Axis2</a></li> <li><a href="/wiki/Apache_Beam" title="Apache Beam">Beam</a></li> <li><a href="/wiki/Apache_Bloodhound" class="mw-redirect" title="Apache Bloodhound">Bloodhound</a></li> <li><a href="/wiki/Apache_Brooklyn" title="Apache Brooklyn">Brooklyn</a></li> <li><a href="/wiki/Apache_Calcite" title="Apache Calcite">Calcite</a></li> <li><a href="/wiki/Apache_Camel" title="Apache Camel">Camel</a></li> <li><a href="/wiki/Apache_CarbonData" title="Apache CarbonData">CarbonData</a></li> <li><a href="/wiki/Apache_Cassandra" title="Apache Cassandra">Cassandra</a></li> <li><a href="/wiki/Apache_Cayenne" title="Apache Cayenne">Cayenne</a></li> <li><a href="/wiki/Apache_CloudStack" title="Apache CloudStack">CloudStack</a></li> <li><a href="/wiki/Apache_Cocoon" title="Apache Cocoon">Cocoon</a></li> <li><a href="/wiki/Apache_Cordova" title="Apache Cordova">Cordova</a></li> <li><a href="/wiki/Apache_CouchDB" title="Apache CouchDB">CouchDB</a></li> <li><a href="/wiki/Apache_cTAKES" title="Apache cTAKES">cTAKES</a></li> <li><a href="/wiki/Apache_CXF" title="Apache CXF">CXF</a></li> <li><a href="/wiki/Apache_Derby" title="Apache Derby">Derby</a></li> <li><a href="/wiki/Apache_Directory" title="Apache Directory">Directory</a></li> <li><a href="/wiki/Apache_Drill" title="Apache Drill">Drill</a></li> <li><a href="/wiki/Apache_Druid" title="Apache Druid">Druid</a></li> <li><a href="/wiki/Apache_Empire-db" title="Apache Empire-db">Empire-db</a></li> <li><a href="/wiki/Apache_Felix" title="Apache Felix">Felix</a></li> <li><a href="/wiki/Apache_Flex" title="Apache Flex">Flex</a></li> <li><a href="/wiki/Apache_Flink" title="Apache Flink">Flink</a></li> <li><a href="/wiki/Apache_Flume" class="mw-redirect" title="Apache Flume">Flume</a></li> <li><a href="/wiki/FreeMarker" title="FreeMarker">FreeMarker</a></li> <li><a href="/wiki/Apache_Geronimo" title="Apache Geronimo">Geronimo</a></li> <li><a href="/wiki/Apache_Groovy" title="Apache Groovy">Groovy</a></li> <li><a href="/wiki/Apache_Guacamole" title="Apache Guacamole">Guacamole</a></li> <li><a href="/wiki/Apache_Gump" title="Apache Gump">Gump</a></li> <li><a href="/wiki/Apache_Hadoop" title="Apache Hadoop">Hadoop</a></li> <li><a href="/wiki/Apache_HBase" title="Apache HBase">HBase</a></li> <li><a href="/wiki/Apache_Helix" title="Apache Helix">Helix</a></li> <li><a href="/wiki/Apache_Hive" title="Apache Hive">Hive</a></li> <li><a href="/wiki/Apache_Iceberg" title="Apache Iceberg">Iceberg</a></li> <li><a href="/wiki/Apache_Ignite" title="Apache Ignite">Ignite</a></li> <li><a href="/wiki/Apache_Impala" title="Apache Impala">Impala</a></li> <li><a href="/wiki/Apache_Jackrabbit" title="Apache Jackrabbit">Jackrabbit</a></li> <li><a href="/wiki/Apache_James" title="Apache James">James</a></li> <li><a href="/wiki/Apache_Jena" title="Apache Jena">Jena</a></li> <li><a href="/wiki/Apache_JMeter" title="Apache JMeter">JMeter</a></li> <li><a href="/wiki/Apache_Kafka" title="Apache Kafka">Kafka</a></li> <li><a href="/wiki/Apache_Kudu" title="Apache Kudu">Kudu</a></li> <li><a href="/wiki/Apache_Kylin" title="Apache Kylin">Kylin</a></li> <li><a href="/wiki/Apache_Lucene" title="Apache Lucene">Lucene</a></li> <li><a href="/wiki/Apache_Mahout" title="Apache Mahout">Mahout</a></li> <li><a href="/wiki/Apache_Maven" title="Apache Maven">Maven</a></li> <li><a href="/wiki/Apache_MINA" title="Apache MINA">MINA</a></li> <li><a href="/wiki/Mod_perl" title="Mod perl">mod_perl</a></li> <li><a href="/wiki/Apache_MyFaces" title="Apache MyFaces">MyFaces</a></li> <li><a href="/wiki/Apache_Mynewt" title="Apache Mynewt">Mynewt</a></li> <li><a href="/wiki/Apache_NiFi" title="Apache NiFi">NiFi</a></li> <li><a href="/wiki/NetBeans" title="NetBeans">NetBeans</a></li> <li><a class="mw-selflink selflink">Nutch</a></li> <li><a href="/wiki/NuttX" title="NuttX">NuttX</a></li> <li><a href="/wiki/Apache_OFBiz" title="Apache OFBiz">OFBiz</a></li> <li><a href="/wiki/Apache_Oozie" title="Apache Oozie">Oozie</a></li> <li><a href="/wiki/Apache_OpenEJB" title="Apache OpenEJB">OpenEJB</a></li> <li><a href="/wiki/Apache_OpenJPA" title="Apache OpenJPA">OpenJPA</a></li> <li><a href="/wiki/Apache_OpenNLP" title="Apache OpenNLP">OpenNLP</a></li> <li><a href="/wiki/Apache_OpenOffice" title="Apache OpenOffice">OрenOffice</a></li> <li><a href="/wiki/Apache_ORC" title="Apache ORC">ORC</a></li> <li><a href="/wiki/Apache_PDFBox" title="Apache PDFBox">PDFBox</a></li> <li><a href="/wiki/Apache_Parquet" title="Apache Parquet">Parquet</a></li> <li><a href="/wiki/Apache_Phoenix" title="Apache Phoenix">Phoenix</a></li> <li><a href="/wiki/Apache_POI" title="Apache POI">POI</a></li> <li><a href="/wiki/Apache_Pig" title="Apache Pig">Pig</a></li> <li><a href="/wiki/Apache_Pinot" title="Apache Pinot">Pinot</a></li> <li><a href="/wiki/Apache_Pivot" title="Apache Pivot">Pivot</a></li> <li><a href="/wiki/Apache_Qpid" title="Apache Qpid">Qpid</a></li> <li><a href="/wiki/Apache_Roller" title="Apache Roller">Roller</a></li> <li><a href="/wiki/Apache_RocketMQ" title="Apache RocketMQ">RocketMQ</a></li> <li><a href="/wiki/Apache_Samza" title="Apache Samza">Samza</a></li> <li><a href="/wiki/Apache_Shiro" title="Apache Shiro">Shiro</a></li> <li><a href="/wiki/Apache_SINGA" title="Apache SINGA">SINGA</a></li> <li><a href="/wiki/Apache_Sling" title="Apache Sling">Sling</a></li> <li><a href="/wiki/Apache_Solr" title="Apache Solr">Solr</a></li> <li><a href="/wiki/Apache_Spark" title="Apache Spark">Spark</a></li> <li><a href="/wiki/Apache_Storm" title="Apache Storm">Storm</a></li> <li><a href="/wiki/Apache_SpamAssassin" title="Apache SpamAssassin">SpamAssassin</a></li> <li><a href="/wiki/Apache_Struts" title="Apache Struts">Struts</a> <ul><li><a href="/wiki/Apache_Struts_1" title="Apache Struts 1">1</a></li></ul></li> <li><a href="/wiki/Apache_Subversion" title="Apache Subversion">Subversion</a></li> <li><a href="/wiki/Apache_Superset" title="Apache Superset">Superset</a></li> <li><a href="/wiki/Apache_SystemDS" title="Apache SystemDS">SystemDS</a></li> <li><a href="/wiki/Apache_Tapestry" title="Apache Tapestry">Tapestry</a></li> <li><a href="/wiki/Apache_Thrift" title="Apache Thrift">Thrift</a></li> <li><a href="/wiki/Apache_Tika" title="Apache Tika">Tika</a></li> <li><a href="/wiki/Apache_TinkerPop" class="mw-redirect" title="Apache TinkerPop">TinkerPop</a></li> <li><a href="/wiki/Apache_Tomcat" title="Apache Tomcat">Tomcat</a></li> <li><a href="/wiki/Apache_Trafodion" class="mw-redirect" title="Apache Trafodion">Trafodion</a></li> <li><a href="/wiki/Apache_Traffic_Server" title="Apache Traffic Server">Traffic Server</a></li> <li><a href="/wiki/UIMA" title="UIMA">UIMA</a></li> <li><a href="/wiki/Apache_Velocity" title="Apache Velocity">Velocity</a></li> <li><a href="/wiki/Apache_Wicket" title="Apache Wicket">Wicket</a></li> <li><a href="/wiki/Apache_Xalan" title="Apache Xalan">Xalan</a></li> <li><a href="/wiki/Apache_Xerces" title="Apache Xerces">Xerces</a></li> <li><a href="/wiki/Apache_XMLBeans" title="Apache XMLBeans">XMLBeans</a></li> <li>Yetus</li> <li><a href="/wiki/Apache_ZooKeeper" title="Apache ZooKeeper">ZooKeeper</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Apache_Commons" title="Apache Commons">Commons</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Byte_Code_Engineering_Library" title="Byte Code Engineering Library">BCEL</a></li> <li><a href="/wiki/Bean_Scripting_Framework" title="Bean Scripting Framework">BSF</a></li> <li><a href="/wiki/Commons_Daemon" title="Commons Daemon">Daemon</a></li> <li><a href="/wiki/Apache_Jelly" title="Apache Jelly">Jelly</a></li> <li><a href="/wiki/Apache_Commons_Logging" title="Apache Commons Logging">Logging</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Incubator</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Taverna" title="Apache Taverna">Taverna</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Other projects</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Batik" title="Apache Batik">Batik</a></li> <li><a href="/wiki/Apache_FOP_(Formatting_Objects_Processor)" class="mw-redirect" title="Apache FOP (Formatting Objects Processor)">FOP</a></li> <li><a href="/wiki/Apache_Ivy" title="Apache Ivy">Ivy</a></li> <li><a href="/wiki/Log4j" title="Log4j">Log4j</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Attic</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_Apex" title="Apache Apex">Apex</a></li> <li><a href="/wiki/AxKit" title="AxKit">AxKit</a></li> <li><a href="/wiki/Apache_Beehive" title="Apache Beehive">Beehive</a></li> <li><a href="/wiki/Apache_iBATIS" title="Apache iBATIS">iBATIS</a></li> <li><a href="/wiki/Apache_Click" title="Apache Click">Click</a></li> <li><a href="/wiki/Apache_Continuum" title="Apache Continuum">Continuum</a></li> <li><a href="/wiki/Deltacloud" title="Deltacloud">Deltacloud</a></li> <li><a href="/wiki/Etch_(protocol)" title="Etch (protocol)">Etch</a></li> <li><a href="/wiki/Apache_Giraph" title="Apache Giraph">Giraph</a></li> <li><a href="/wiki/Apache_Hama" title="Apache Hama">Hama</a></li> <li><a href="/wiki/Apache_Harmony" title="Apache Harmony">Harmony</a></li> <li><a href="/wiki/Jakarta_Project" title="Jakarta Project">Jakarta</a></li> <li><a href="/wiki/Apache_Marmotta" title="Apache Marmotta">Marmotta</a></li> <li><a href="/wiki/Apache_MXNet" title="Apache MXNet">MXNet</a></li> <li><a href="/wiki/Apache_ODE" title="Apache ODE">ODE</a></li> <li><a href="/wiki/Jini" title="Jini">River</a></li> <li><a href="/wiki/Apache_Shale" title="Apache Shale">Shale</a></li> <li><a href="/wiki/Jakarta_Slide" class="mw-redirect" title="Jakarta Slide">Slide</a></li> <li><a href="/wiki/Sqoop" title="Sqoop">Sqoop</a></li> <li><a href="/wiki/Apache_Stanbol" title="Apache Stanbol">Stanbol</a></li> <li><a href="/wiki/Apache_Tuscany" class="mw-redirect" title="Apache Tuscany">Tuscany</a></li> <li><a href="/wiki/Apache_Wave" class="mw-redirect" title="Apache Wave">Wave</a></li> <li><a href="/wiki/Apache_XML" title="Apache XML">XML</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Licenses</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Apache_License" title="Apache License">Apache License</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span> <b><a href="/wiki/Category:Apache_Software_Foundation" title="Category:Apache Software Foundation">Category</a></b></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235"></div><div role="navigation" class="navbox" aria-labelledby="Web_crawlers" style="padding:3px"><table class="nowraplinks mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231"><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Web_crawlers" title="Template:Web crawlers"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Web_crawlers" title="Template talk:Web crawlers"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Web_crawlers" title="Special:EditPage/Template:Web crawlers"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Web_crawlers" style="font-size:114%;margin:0 4em"><a href="/wiki/Web_crawler" title="Web crawler">Web crawlers</a></div></th></tr><tr><td class="navbox-abovebelow" colspan="2"><div><a href="/wiki/Internet_bot" title="Internet bot">Internet bots</a> designed for <a href="/wiki/Web_crawling" class="mw-redirect" title="Web crawling">Web crawling</a> and <a href="/wiki/Web_indexing" title="Web indexing">Web indexing</a></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Active</th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/80legs" title="80legs">80legs</a></li> <li><a href="/wiki/Bingbot" title="Bingbot">bingbot</a></li> <li><a href="/wiki/Crawljax" title="Crawljax">Crawljax</a></li> <li><a href="/wiki/Fetcher" class="mw-redirect" title="Fetcher">Fetcher</a></li> <li><a href="/wiki/Googlebot" title="Googlebot">Googlebot</a></li> <li><a href="/wiki/Heritrix" title="Heritrix">Heritrix</a></li> <li><a href="/wiki/HTTrack" title="HTTrack">HTTrack</a></li> <li><a href="/wiki/PowerMapper" title="PowerMapper">PowerMapper</a></li> <li><a href="/wiki/Wget" title="Wget">Wget</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Discontinued</th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/FAST_Crawler" class="mw-redirect" title="FAST Crawler">FAST Crawler</a></li> <li><a href="/wiki/Msnbot" title="Msnbot">msnbot</a></li> <li><a href="/wiki/RBSE" class="mw-redirect" title="RBSE">RBSE</a></li> <li><a href="/wiki/TkWWW_robot" class="mw-redirect" title="TkWWW robot">TkWWW robot</a></li> <li><a href="/wiki/Twiceler" class="mw-redirect" title="Twiceler">Twiceler</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Types</th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Distributed_web_crawler" class="mw-redirect" title="Distributed web crawler">Distributed web crawler</a></li> <li><a href="/wiki/Focused_crawler" title="Focused crawler">Focused crawler</a></li></ul> </div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐api‐int.codfw.main‐849f99967d‐kkkmt Cached time: 20241124121306 Cache expiry: 2592000 Reduced expiry: false Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 0.550 seconds Real time usage: 0.718 seconds Preprocessor visited node count: 2361/1000000 Post‐expand include size: 69458/2097152 bytes Template argument size: 5737/2097152 bytes Highest expansion depth: 25/100 Expensive parser function count: 5/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 71675/5000000 bytes Lua time usage: 0.307/10.000 seconds Lua memory usage: 6559161/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 624.953 1 -total 24.25% 151.537 2 Template:Infobox 24.22% 151.351 1 Template:Reflist 23.42% 146.394 1 Template:Infobox_software 19.89% 124.322 13 Template:Cite_web 17.95% 112.186 2 Template:Navbox 17.92% 111.999 1 Template:Apache_Software_Foundation 17.75% 110.902 1 Template:Short_description 13.29% 83.079 1 Template:Infobox_software/simple 10.74% 67.109 1 Template:Multiple_releases --> <!-- Saved in parser cache with key enwiki:pcache:398847:|#|:idhash:canonical and timestamp 20241124121306 and revision id 1245172641. Rendering was triggered because: api-parse --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1&useformat=desktop" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Apache_Nutch&oldid=1245172641">https://en.wikipedia.org/w/index.php?title=Apache_Nutch&oldid=1245172641</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Apache_Software_Foundation_projects" title="Category:Apache Software Foundation projects">Apache Software Foundation projects</a></li><li><a href="/wiki/Category:Internet_search_engines" title="Category:Internet search engines">Internet search engines</a></li><li><a href="/wiki/Category:Free_search_engine_software" title="Category:Free search engine software">Free search engine software</a></li><li><a href="/wiki/Category:Java_(programming_language)_libraries" title="Category:Java (programming language) libraries">Java (programming language) libraries</a></li><li><a href="/wiki/Category:Cross-platform_free_software" title="Category:Cross-platform free software">Cross-platform free software</a></li><li><a href="/wiki/Category:Free_web_crawlers" title="Category:Free web crawlers">Free web crawlers</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_is_different_from_Wikidata" title="Category:Short description is different from Wikidata">Short description is different from Wikidata</a></li><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_October_2015" title="Category:Articles with unsourced statements from October 2015">Articles with unsourced statements from October 2015</a></li><li><a href="/wiki/Category:Official_website_different_in_Wikidata_and_Wikipedia" title="Category:Official website different in Wikidata and Wikipedia">Official website different in Wikidata and Wikipedia</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 11 September 2024, at 13:01<span class="anonymous-show"> (UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Apache_Nutch&mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://wikimediafoundation.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/static/images/footer/wikimedia-button.svg" width="84" height="29" alt="Wikimedia Foundation" loading="lazy"></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><img src="/w/resources/assets/poweredby_mediawiki.svg" alt="Powered by MediaWiki" width="88" height="31" loading="lazy"></a></li> </ul> </footer> </div> </div> </div> <div class="vector-settings" id="p-dock-bottom"> <ul></ul> </div><script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.codfw.main-759d5489db-jx949","wgBackendResponseTime":129,"wgPageParseReport":{"limitreport":{"cputime":"0.550","walltime":"0.718","ppvisitednodes":{"value":2361,"limit":1000000},"postexpandincludesize":{"value":69458,"limit":2097152},"templateargumentsize":{"value":5737,"limit":2097152},"expansiondepth":{"value":25,"limit":100},"expensivefunctioncount":{"value":5,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":71675,"limit":5000000},"entityaccesscount":{"value":1,"limit":400},"timingprofile":["100.00% 624.953 1 -total"," 24.25% 151.537 2 Template:Infobox"," 24.22% 151.351 1 Template:Reflist"," 23.42% 146.394 1 Template:Infobox_software"," 19.89% 124.322 13 Template:Cite_web"," 17.95% 112.186 2 Template:Navbox"," 17.92% 111.999 1 Template:Apache_Software_Foundation"," 17.75% 110.902 1 Template:Short_description"," 13.29% 83.079 1 Template:Infobox_software/simple"," 10.74% 67.109 1 Template:Multiple_releases"]},"scribunto":{"limitreport-timeusage":{"value":"0.307","limit":"10.000"},"limitreport-memusage":{"value":6559161,"limit":52428800}},"cachereport":{"origin":"mw-api-int.codfw.main-849f99967d-kkkmt","timestamp":"20241124121306","ttl":2592000,"transientcontent":false}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Apache Nutch","url":"https:\/\/en.wikipedia.org\/wiki\/Apache_Nutch","sameAs":"http:\/\/www.wikidata.org\/entity\/Q1372248","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q1372248","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2003-12-12T17:12:10Z","dateModified":"2024-09-11T13:01:56Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/en\/e\/e0\/NutchScreenshot.png","headline":"open source web crawler software"}</script> </body> </html>