CINXE.COM
Internet Archive: Internet Collections
<html> <head><script type="text/javascript" src="/_static/js/bundle-playback.js?v=HxkREWBo" charset="utf-8"></script> <script type="text/javascript" src="/_static/js/wombat.js?v=txqj7nKC" charset="utf-8"></script> <script>window.RufflePlayer=window.RufflePlayer||{};window.RufflePlayer.config={"autoplay":"on","unmuteOverlay":"hidden"};</script> <script type="text/javascript" src="/_static/js/ruffle/ruffle.js"></script> <script type="text/javascript"> __wm.init("https://web.archive.org/web"); __wm.wombat("http://archive.org:80/internet/index.html","20011204000703","https://web.archive.org/","web","/_static/", "1007424423"); </script> <link rel="stylesheet" type="text/css" href="/_static/css/banner-styles.css?v=S1zqJCYt" /> <link rel="stylesheet" type="text/css" href="/_static/css/iconochive.css?v=3PDvdIFv" /> <!-- End Wayback Rewrite JS Include --> <meta http-equiv="content-type" content="text/html;charset=iso-8859-1"> <meta name="generator" content="Microsoft FrontPage 4.0"> <title>Internet Archive: Internet Collections</title> <link href="/web/20011204000703cs_/http://archive.org/stylesheet.css" rel="styleSheet" type="text/css"> </head> <body bgcolor="#999966"> <center> <table border="0" cellpadding="1" cellspacing="0" width="717" bgcolor="#666633"> <tr> <td> <center> <table border="0" cellpadding="0" cellspacing="0" width="715" bgcolor="#ffffcc"> <tr> <td><csobj w="1001" h="103" t="Component" csref="../../Archive.org%20Site.data/Components/header.html" occur="0"><a name="top"></a> <table border="0" cellpadding="0" cellspacing="0" width="715" height="70"> <tr> <td width="14" rowspan="2"><a href="/web/20011204000703/http://archive.org/index.html"><img height="71" width="14" src="/web/20011204000703im_/http://archive.org/images/logoa.gif" border="0" alt="Internet"></a></td> <td width="70"><a href="/web/20011204000703/http://archive.org/index.html"><img height="14" width="70" src="/web/20011204000703im_/http://archive.org/images/logob.gif" border="0" alt="Archive"></a></td> <td width="14" rowspan="2"><img height="5" width="14" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="500" rowspan="2" valign="bottom"><font color="#666633"><span class="14pt"><b> <table border="0" cellpadding="0" cellspacing="0" width="617"> <tr> <td><p><img src="/web/20011204000703im_/http://archive.org/images/header.gif" alt="The Internet Archive: Building an 'Internet Library'" width="350" height="60"><br> <img height="8" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></p> </td> <td valign="top"> <div align="right"> <img height="2" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><br> <a href="/web/20011204000703/http://archive.org/index.html"><b>Home</b></a><b> <br> <img height="2" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><br> <a href="/web/20011204000703/http://archive.org/news/index.html">News</a> <br> <img height="2" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><img height="2" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><br> <a href="/web/20011204000703/http://archive.org/contacts/index.html">Contacts</a> <br> </b><img height="8" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></div> </td> </tr> </table> </b></span></font></td> </tr> <tr> <td width="70"><a href="/web/20011204000703/http://archive.org/index.html"><img height="56" width="70" src="/web/20011204000703im_/http://archive.org/images/logoc.jpg" border="0" alt="Internet Archive logo"></a></td> </tr> </table> </csobj><csobj w="1001" h="33" t="Component" csref="../../Archive.org%20Site.data/Components/level2_internet.html" occur="0"> <table border="0" cellpadding="1" cellspacing="0" bgcolor="#999966" width="715"> <tr> <td colspan="6"><img height="2" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> </tr> <tr> <td width="5"><img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="81" bgcolor="#ffffcc"> <center> <img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><a href="index.html"><b>Internet</b></a><img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></center> </td> <td width="78"> <center> <img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><a href="/web/20011204000703/http://archive.org/movies/index.html"><font color="#ffffcc"><b>Movies</b></font></a><img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></center> </td> <td width="89"> <center> <img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><a href="/web/20011204000703/http://archive.org/arpanet/index.html"><font color="#ffffcc"><b>Arpanet</b></font></a><img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></center> </td> <td width="150"> <center> <img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"><a href="/web/20011204000703/http://archive.org/about/index.html"><font color="#ffffcc"><nobr><b>About the Archive</b></nobr></font></a><img height="5" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></center> </td> <td><img height="1" width="225" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> </tr> <tr> <td width="5"><img height="1" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="81" bgcolor="#ffffcc"><img height="1" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="78"><img height="1" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="89"><img height="1" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="150"><img height="1" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td><img height="1" width="5" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> </tr> </table> </csobj> <table border="0" cellpadding="0" cellspacing="0" bgcolor="#ffffcc" width="100%"> <tr height="20" valign="bottom"> <td width="15" height="20"><img height="5" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td width="583" height="20"><img height="13" width="13" src="/web/20011204000703im_/http://archive.org/images/triangle.gif"><a href="about.html">About These Collections</a><font color="#ffffcc"><b> </b></font><img height="13" width="13" src="/web/20011204000703im_/http://archive.org/images/triangle.gif"><a href="faqs.html">FAQs</a></td> </tr> </table> <p> <table border="0" cellpadding="0" cellspacing="0" width="100%"> <tr> <td width="16" valign="top"><img height="5" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td valign="top"> <h1>Internet Collections</h1> <p>The Archive’s Internet collections include <a href="#Web">World Wide Web pages</a> | <a href="#Usenet">Usenet bulletin boards</a> | <a href="#FTP">FTP sites</a> </p> <h2><a name="Web"></a>World Wide Web Pages</h2> <table border="0" cellpadding="0" cellspacing="0" width="100%"> <tr> <td valign="top"> <div align="right"> <span class="smaller">DATES:</span></div> </td> <td valign="top"><img height="5" width="10" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td valign="top">October 1996 to now</td> </tr> <tr> <td valign="top"> <div align="right"> <span class="smaller"><nobr>SIZE:</nobr></span></div> </td> <td></td> <td valign="top">100 terabytes (about 11 billion pages, text only during 1999)</td> </tr> <tr> <td valign="top"> <div align="right"> <span class="smaller">ACCESS:</span></div> </td> <td></td> <td valign="top"><a href="https://web.archive.org/web/20011204000703/http://web.archive.org/">Wayback Machine</a>, or see the Archive’s <a href="proposal.html">proposal form</a> </td> </tr> </table> <p></p> <div align="center"> <table border="0" cellpadding="2" cellspacing="0" width="100%" bgcolor="#CCCC99"> <tr> <td> <div align="center"> <table border="0" cellpadding="4" cellspacing="0" width="100%"> <tr> <td bgcolor="#CCCC99" colspan="3"> <p align="center"><b>- The Wayback Machine -</b> </td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3"> <p align="center"><a href="https://web.archive.org/web/20011204000703/http://web.archive.org/"><img src="/web/20011204000703im_/http://archive.org/images/wayback_logo.gif" border="0" width="235" height="83"></a><br> Surf the Web as it was</p> </td> </tr> <tr> <!-- WAYBACK POST --> <td bgcolor="#FFFFFF" colspan="3" align="center"> <form action="https://web.archive.org/web/20011204000703/http://web.archive.org/archive_request_ng" method="get"> <input type="hidden" name="collection" value="web"> <input type="text" name="url" value="http://" size="40" maxlength="256"> <input type="submit" value="Take Me Back!" name="Submit"> </form> </td> <!-- /WAYBACK POST --> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3"> The Internet Archive, working with Alexa Internet, has created the <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/">Wayback Machine</a>. The Wayback Machine makes it possible to surf pages stored in the Internet Archive's web archive. The Wayback Machine was unveiled on October 24th at the <a href="https://web.archive.org/web/20011204000703/http://www.loc.gov/">Library of Congress </a>and the University of California - Berkeley's <a href="https://web.archive.org/web/20011204000703/http://www.lib.berkeley.edu/BANC/">Bancroft Library</a>. Visit the Wayback Machine by entering an URL above or clicking on specific collections below.<br> </td> </tr> <tr> <td bgcolor="#CCCC99" colspan="3"> <p align="center"><b>- Special Wayback Collections -</b></td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3"><a href="https://web.archive.org/web/20011204000703/http://archive.alexa.com/collections/e2k.html"><img src="/web/20011204000703im_/http://archive.org/images/e2k_logo.gif" border="0" width="380" height="35"></a></td> </tr> <tr> <td bgcolor="#FFFFFF"> </td> <td bgcolor="#FFFFFF" colspan="2">The United States Elections of 2000 were perhaps the most controversial elections in our nation's history. This collection was commissioned by the <a href="https://web.archive.org/web/20011204000703/http://www.loc.gov/">Library of Congress</a> to archive digital materials covering the Election of 2000. Additional assistance has been provided by <a href="https://web.archive.org/web/20011204000703/http://www.alexa.com/">Alexa Internet</a> and <a href="https://web.archive.org/web/20011204000703/http://www.compaq.com/">Compaq</a>. It contains 800 gigabytes of data gathered from 8/1/2000 to 1/21/2001. For more information see the <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/collections/e2k/press_release.html">Press Release</a> or <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/collections/e2k/faqs.html">FAQs</a>.<a href="https://web.archive.org/web/20011204000703/http://archive.alexa.com/web/*/http://epa.gov"><br> </a> <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/collections/e2k.html"><img height="14" src="/web/20011204000703im_/http://archive.org/images/arrow.gif" width="29" border="0"></a> <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/collections/e2k.html">Go Wayback</a></td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3"><img src="/web/20011204000703im_/http://archive.org/images/sep11_logo.gif" width="380" height="35"></td> </tr> <tr> <td bgcolor="#FFFFFF"> </td> <td bgcolor="#FFFFFF" colspan="2">The tragic events of September 11, 2001, prompted web creators around the world to respond. This collection of archived documents was commissioned by the <a href="https://web.archive.org/web/20011204000703/http://www.loc.gov/">Library of Congress</a> to preserve digital materials covering the events of September 11, 2001. Additional assistance has been provided by <a href="https://web.archive.org/web/20011204000703/http://www.alexa.com/">Alexa Internet</a>, <a href="https://web.archive.org/web/20011204000703/http://www.webarchivist.org/">webArchivist.org</a> and <a href="https://web.archive.org/web/20011204000703/http://www.pewinternet.org/">Pew Internet & American Life</a>. This collection contains more than 5 terabytes of data gathered from September 11, 2001 to October 11, 2001. <a href="https://web.archive.org/web/20011204000703/http://september11th.archive.org/"><img height="14" src="/web/20011204000703im_/http://archive.org/images/arrow.gif" width="29" border="0"></a> <a href="https://web.archive.org/web/20011204000703/http://september11.archive.org/">Go Wayback</a></td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3"><a href="https://web.archive.org/web/20011204000703/http://archive.alexa.com/collections/pioneers.html"><img src="/web/20011204000703im_/http://archive.org/images/pioneer_logo.gif" border="0" width="381" height="35"></a></td> </tr> <tr> <td bgcolor="#FFFFFF"> </td> <td bgcolor="#FFFFFF" colspan="2">The early years of the internet are a testament to the internet's diversity and ingenuity and obsession with Star Trek. This special collection highlights a handful of sites that played a role in the early internet. <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/collections/pioneers.html"><img height="14" src="/web/20011204000703im_/http://archive.org/images/arrow.gif" width="29" border="0"></a> <a href="https://web.archive.org/web/20011204000703/http://web.archive.org/collections/pioneers.html">Go Wayback</a></td> </tr> <tr> <td bgcolor="#FFFFFF"> </td> <td bgcolor="#FFFFFF" colspan="2"><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></td> </tr> <tr> <td width="100%" bgcolor="#CCCC99" colspan="3"> <p align="center"><b>- Other Internet Collections -</b></td> </tr> <tr> <td width="100%" bgcolor="#FFFFFF" colspan="3" valign="top"><b>1996 Presidential Election Sites at the Smithsonian</b></td> </tr> <tr> <td bgcolor="#FFFFFF"> </td><td width="25%" bgcolor="#FFFFFF" valign="top"> <table width="271" height="231" cellspacing="0" cellpadding="0" border="0"> <tr height="24"> <td height="24" colspan="3" bgcolor="#cccc99"><img src="/web/20011204000703im_/http://archive.org/images/top.gif" height="24" width="271" border="0"></td> </tr> <tr height="200"> <td width="2" height="200"><img src="/web/20011204000703im_/http://archive.org/images/side.gif" height="200" width="2" border="0"></td> <td height="200"> <a href="https://web.archive.org/web/20011204000703/http://movie0.archive.org/96_Elections/index.htm"><img src="/web/20011204000703im_/http://archive.org/images/loop1.gif" height="200" width="267" border="0" alt="Page from a snapshot of the Web, now in the Smithsonian"></a> </td> <td width="2" height="200"><img src="/web/20011204000703im_/http://archive.org/images/side.gif" height="200" width="2" border="0"></td> </tr> <tr height="7"> <td height="7" colspan="3"><img src="/web/20011204000703im_/http://archive.org/images/bottom.gif" height="7" width="271" border="0"></td> </tr> </table> </td> <td bgcolor="#FFFFFF" valign="top"><font face="Verdana">A display at the <a href="https://web.archive.org/web/20011204000703/http://movie0.archive.org/96_Elections/index.htm">Smithsonian Institution</a> shows how presidential candidates and parties first used the Web. The display includes 1996 campaign pages for five political parties — as well as pages such as the "Steve Forbes Official Home Page" and the "Official Internet Headquarters of the [Pat] Buchanan Brigade," which were captured before some candidates dropped out of the race and scaled back or shut down their sites. </font> <p><font face="Verdana">The display also includes pages from the Federal Election Commission site with financial information about candidates, parties, and political action committees.</font></p> <p><a href="https://web.archive.org/web/20011204000703/http://movie0.archive.org/96_Elections/index.htm"><font face="Verdana"><span class="smaller">SEE THE SMITHSONIAN'S DISPLAY</span></font></a><font size="2"> </font>or<font size="2"> </font><a href="/web/20011204000703/http://archive.org/collections/96_election_links.html"><span class="smaller">SEE MORE '96 ELECTION SITES</span><font size="2"> </font></a>including voter advocacy and news sites, more candidates' sites, and parodies of the candidates.</td> </tr> <tr> <td width="100%" bgcolor="#FFFFFF" colspan="3" valign="top"><b><a name="Lobby_Sculpture"></a>World Wide Web 1997: 2 Terabytes in 63 Inches</b></td> </tr> <tr> <td bgcolor="#FFFFFF" valign="top"> </td> <td bgcolor="#FFFFFF" valign="top"><a href="LoC_sculpture.html"><img height="300" width="300" src="/web/20011204000703im_/http://archive.org/images/illustrations/LoC_user1.jpg" border="0" align="top" alt="Sculpture of 1997 Web snapshot in the lobby of the Library of Congress" vspace="6"></a></td> <td bgcolor="#FFFFFF" valign="top"> <p>What would a snapshot of the Web look like? Visitors passing through the lobby of the Library of Congress get the picture when they see a sculpture — a stack of computer screens and tapes housing a snapshot of the Web in early 1997 — by Alan Rath. The Internet Archive is proud to have part of its collections in the Library of Congress.</p> <p> Alan Rath, 1997<br> Software by Art Medlar<br> Aluminum, computer, electronics, digital tape<br> Library of Congress, Washington, DC<br> Hardware and software gift of the Internet Archive<br> Data gift of <a href="https://web.archive.org/web/20011204000703/http://www.alexa.com/">Alexa Internet</a></p> <p><a href="LoC_sculpture.html"><span class="smaller">Watch a demo of the sculpture</span></a><span class="smaller"> </span>(if you have an ISDN or faster connection)</td> </tr> <tr> <td bgcolor="#FFFFFF" valign="top"> </td> <td bgcolor="#FFFFFF" valign="top" colspan="2"><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></td> </tr> <tr> <td width="100%" bgcolor="#CCCC99" colspan="3" valign="top"> <p align="center"><b>- Other Internet Projects -</b></td> </tr> <tr> <td width="100%" bgcolor="#FFFFFF" colspan="3" valign="top"><b>Xerox PARC Research Projects</b></td> </tr> <tr> <td bgcolor="#FFFFFF" valign="top"> </td> <td bgcolor="#FFFFFF" colspan="2" valign="top"> <p>"It Grows on Its Own Like an Ecosystem"</p> <p>The Internet Ecologies Area at Xerox’s Palo Alto Research Center is using multiple snapshots from the Internet Archive on disk — "the Web in a box" — as a kind of test tube for understanding the Web. "We see the Web as an ‘information ecology,’ where we study the relationships between people and information," says PARC researcher Jim Pitkow.</p> <p>PARC "benefited greatly" from access to the Archive’s crawls, says Pitkow’s colleague and Stanford physics professor Bernardo Huberman. According to Pitkow, access to the snapshots "is great for researchers because it lets them fuse traditional tools and techniques with new tools that haven’t existed before."</p> <p>Huberman describes a PARC study that produced a mathematical "law of surfing," which says that Web traffic follows predictable, regular patterns. For example, in a manifestation of the "winner take all" principle, it turns out that just a few Web sites get most of the traffic. The researchers were also able to show how deeply people delve into a typical Web site: on average, it’s about a page and a half. Huberman has also studied Internet congestion as a social dilemma, where people weigh the costs and benefits of putting up with slow traffic versus waiting until the network is less crowded.</p> <p>In a study of the topology of the Web, a Stanford graduate student working on PARC’s Internet ecology project found that any two Web sites are no more than four clicks away from each other — hard evidence that the world is smaller than it seems, on the Web at least.</p> <p>Research on this scale and of this complexity makes new thinking possible in a whole range of fields, from graph theory to sociology. Pitkow compares what’s happening to the Einstein-era thrust past the limitations of Newtonian physics into quantum mechanics: "The Web," he says, "requires a whole new form of understanding."</p> <p>News coverage and further information:</p> <dir> <p><b>Xerox PARC Internet Ecologies Area</b><u><br> </u><a href="https://web.archive.org/web/20011204000703/http://www.parc.xerox.com/iea">http://www.parc.xerox.com/iea</a></p> </dir> <p><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3" valign="top"><b>IBM Research Projects</b></td> </tr> <tr> <td bgcolor="#FFFFFF" valign="top"> </td> <td bgcolor="#FFFFFF" colspan="2" valign="top"> <p>"The La Brea Tar Pits of Our Age"</p> <p>Inside the building where high-performance, large-capacity storage disks were invented, researchers at IBM's Almaden Research Center are developing software that deals intelligently with large masses of data. Using a "crawl," or snapshot of the Web, from the Internet Archive, they've developed successors to Intelligent Data Miner, a program that sorts and indexes large amounts of raw data.</p> <p>The software is useful for mundane tasks like properly routing email to sales, tech support, and other departments. But IBM research associate Bruce Baumgart and his colleagues have also used it — along with a large body of data like the crawl from the Internet Archive — to find out how Web sites point to one another and form communities of common interest. Baumgart says that "unleashing" the IDM on the Archive's data reveals "clusters of activity.... You see what was hot, what the breaking story was, say, on a given date two years ago."</p> <p>Baumgart compares the Archive with the La Brea tar pits — a large deposit of pitch in the middle of Los Angeles, where paleontologists make important discoveries as they dig up the fossils of creatures and plants that fell into the pits during the Ice Age. As people begin to understand how projects like the Internet Archive benefit communities, Baumgart believes that demand for lasting storage media will grow — in contrast to the current market for media that last only a few years before the stored data begins to degrade.</p> <p><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3" valign="top"><b>AT&T Research Projects</b></td> </tr> <tr> <td bgcolor="#FFFFFF" valign="top"> </td> <td bgcolor="#FFFFFF" colspan="2" valign="top"> <p>Dilbert Versus Doonesbury</p> <p>When it comes to research, <a href="https://web.archive.org/web/20011204000703/http://www.research.att.com/">AT&T</a> researcher Balachander Krishnamurthy sees plenty of advantages in a library like the Internet Archive.</p> <p>First, the Archive eliminates the need for researchers to develop their own "crawlers" (software that search engines and others use to gather Web pages). This saves researchers time and expense, and without what Krishnamurthy refers to as the "mental stumbling block" of development, they can test new ideas quickly.</p> <p>For example, using a ready-made Internet library like the Archive, a researcher interested in the popularity of <a href="https://web.archive.org/web/20011204000703/http://www.dilbert.com/">dilbert.com</a> compared to <a href="https://web.archive.org/web/20011204000703/http://www.doonesbury.com/">doonesbury.com</a> could analyze links to those sites in minutes. (Search engines like <a href="https://web.archive.org/web/20011204000703/http://www.google.com/">Google</a> use similar methods to return high-quality search results.) By contrast, a researcher could spend weeks just developing a spider and crawling enough Web servers to get adequate data. If the researcher wanted to compare crawls over time — for example, to look at public use of government information by comparing the most-linked-to dot-gov sites over the course of an election campaign — weeks would turn into months.</p> <p>Besides, "the algorithms for good crawling aren't published, so you're better off getting someone else's crawl if you just want access to data," says Krishnamurthy, adding that "the Archive has high-quality crawls."</p> <p>Krishnamurthy emphasizes that using the Archive's collections results in other efficiencies too. Not only does easy, central access to Web data spare trouble, expense, and time for researchers — but "when fewer crawlers are at work on the Web," he says, "it reduces the load on individual site servers and on the Internet in general."</p> <p>Furthermore, searches of the nonprofit Archive's collections are anonymous, whereas other searches may not be. For example, says Krishnamurthy, "if someone did a patent search on a site hosted by a private corporation, they could potentially reveal valuable information about themselves and the topics they were interested in, maybe to a competitor."</p> <p>One of the Archive's most important benefits is a traditional one. The Archive's open access lets researchers engage in a fundamental scientific practice: replicating colleagues' experiments and performing valid, publishable comparisons of the results. And in an open, worldwide environment like the Internet, where virtually anyone can build new software, the Archive is a practical environment for testing whether the software conforms to Internet standards. "Suppose someone had an idea for a different compression algorithm for hypertext transfer protocol," Krishnamurthy suggests. "Is it worth it to develop it or not?" An analysis using the Archive's Web collection would provide a quick answer.</p> <p><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></td> </tr> <tr> <td bgcolor="#FFFFFF" colspan="3" valign="top"><b>Federal Government Information Clearinghouse: GILS Initiative</b></td> </tr> <tr> <td bgcolor="#FFFFFF" valign="top"> </td> <td bgcolor="#FFFFFF" colspan="2" valign="top"> <p>The US Federal Government Information Clearinghouse is implementing a common standard for government information and services. The standard, the Global Information Locator Service, aims to make it easier for people to find information of all kinds, in all media, in all languages, and over time.</p> <p>The Clearinghouse lists the Internet Archive as a partner in its efforts. Some federal Clearinghouse "portals" that have already been built include the Government Printing Office access facility, the National Spatial Data Infrastructure Clearinghouse for Geospatial Data, and the National Biological Information Infrastructure Metadata Clearinghouse. Among those likely to be built soon are portals to the Department of Energy, the Library of Congress, NASA, the National Library of Medicine, the National Oceanic and Atmospheric Administration, the Patent and Trademark Office, and the US Geological Survey.</p> <p>More information on the Clearinghouse and the Global Information Locator Service:</p> <dir> <p><a href="https://web.archive.org/web/20011204000703/http://www.gils.net/AccessAmerica">A Partner’s Guide to the US Federal Government Information Clearinghouse</a></p> <p><a href="https://web.archive.org/web/20011204000703/http://www.gils.net/">Global Information Locator Service Web site</a></p> </dir> <p><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a> </td> </tr> </table> </div> </td> </tr> </table> </div> <h2><a name="FTP"></a>FTP Sites in the Archive</h2> <p> <table border="0" cellpadding="0" cellspacing="0" width="100%"> <tr> <td valign="top"> <div align="right"> <span class="smaller">DATES:</span></div> </td> <td valign="top"><img height="5" width="10" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td valign="top">July to October 1996</td> </tr> <tr> <td valign="top"> <div align="right"> <span class="smaller"><nobr>SIZE:</nobr></span></div> </td> <td></td> <td valign="top">0.05 terabyte (about 50,000 sites)</td> </tr> <tr> <td valign="top"> <div align="right"> <span class="smaller">ACCESS:</span></div> </td> <td></td> <td valign="top">See the Archive’s <a href="proposal.html">proposal form</a> and <a href="/web/20011204000703/http://archive.org/terms/index.html">terms of use</a></td> </tr> </table> <p><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></p> <h2><a name="Usenet"></a>Usenet Bulletin Boards in the Archive</h2> <p> <table border="0" cellpadding="0" cellspacing="0" width="100%"> <tr> <td valign="top"> <div align="right"> <span class="smaller">DATES:</span></div> </td> <td valign="top"><img height="5" width="10" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> <td valign="top">October 1996 to late 1998, 2000 to now</td> </tr> <tr> <td valign="top"> <div align="right"> <span class="smaller"><nobr>SIZE:</nobr></span></div> </td> <td></td> <td valign="top">0.5 terabyte (about 16 million postings)</td> </tr> <tr> <td valign="top"> <div align="right"> <span class="smaller">ACCESS:</span></div> </td> <td></td> <td valign="top">See the Archive’s <a href="proposal.html">proposal form</a> and <a href="/web/20011204000703/http://archive.org/terms/index.html">terms of use</a></td> </tr> </table> <p> <table border="0" cellpadding="0" cellspacing="0" width="100%"> <tr> <td colspan="2"><i><b>Find out<br> </b></i><img height="5" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> </tr> <tr> <td width="14" valign="top"><img height="10" width="10" src="/web/20011204000703im_/http://archive.org/images/bullet.gif"></td> <td width="100%" valign="top">How we <a href="about.html">acquire and store</a> the Internet collections</td> </tr> <tr> <td width="14" valign="top"><img height="10" width="10" src="/web/20011204000703im_/http://archive.org/images/bullet.gif"></td> <td width="100%" valign="top">How to propose a project for <a href="proposal.html">using the Internet collections</a></td> </tr> <tr> <td width="14" valign="top"><img height="10" width="10" src="/web/20011204000703im_/http://archive.org/images/bullet.gif"></td> <td width="100%" valign="top">About our <a href="/web/20011204000703/http://archive.org/contacts/index.html#Lists">announcement and discussion lists</a> on Internet libraries and movie archives</td> </tr> </table> <p><a href="#top"><img height="20" width="85" src="/web/20011204000703im_/http://archive.org/images/to_the_top.gif" border="0" vspace="6" alt="Top of Page"></a></p> <p><br> </td> <td valign="top"><img height="5" width="15" src="/web/20011204000703im_/http://archive.org/images/clear.gif"></td> </tr> </table> </td> </tr> </table> <table border="0" cellpadding="3" cellspacing="0" bgcolor="#999966" width="715"> <tr> <td> <div align="left"> <font color="#ffffcc"><span class="smaller"><csobj w="709" h="18" t="Component" csref="../../Archive.org%20Site.data/Components/footer.html" occur="0"><span class="smaller"><img height="5" width="13" src="/web/20011204000703im_/http://archive.org/images/clear.gif" border="0"><a href="/web/20011204000703/http://archive.org/terms/index.html#Privacy_Policy" style="COLOR: #ffffcc">Terms, Privacy, and Copyright 10Mar01</a> | <a href="/web/20011204000703/http://archive.org/contacts/index.html" style="COLOR: #ffffcc">Contact Us</a></span></csobj></span></font> <!-- tracker --> <font size="1" color="white"> <script><!-- an=navigator.appName;sr='https://web.archive.org/web/20011204000703/http://x3.extreme-dm.com/';srw="na";srb="na";d=document;r=41;function pr(n) { d.write("<img src=\""+sr+"n\/?tag=archive&p=http%3A%2F%2Fwww.archive.org%2Finternet%2Findex.html&j=y&srw="+srw+"&srb="+srb+"&l="+escape(d.referrer)+"&rs="+r+"\" height=1 width=1>");} // --> </script> <script language="javascript1.2"><!-- s=screen;srw=s.width;an!="Netscape"?srb=s.colorDepth:srb=s.pixelDepth // --> </script> <script><!-- pr() // --> </script> <noscript> <img src="https://web.archive.org/web/20011204000703im_/http://x3.extreme-dm.com/z/?tag=archive&p=http%3A%2F%2Fwww.archive.org%2Finternet%2Findex.html&j=n" height="1" width="1"> </noscript> </font><!-- end tracker --> </div> </td> </tr> </table> </center> </td> </tr> </table> </center> </body> </html> <!-- FILE ARCHIVED ON 00:07:03 Dec 04, 2001 AND RETRIEVED FROM THE INTERNET ARCHIVE ON 08:27:19 Nov 24, 2024. JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE. ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C. SECTION 108(a)(3)). --> <!-- playback timings (ms): captures_list: 0.661 exclusion.robots: 0.031 exclusion.robots.policy: 0.018 esindex: 0.011 cdx.remote: 45.899 LoadShardBlock: 54.137 (3) PetaboxLoader3.datanode: 99.501 (4) load_resource: 166.903 PetaboxLoader3.resolve: 82.998 -->