The Internet Archive, Trying to Encompass All Creation - The New York Times

<!DOCTYPE html> <!--[if (gt IE 9)|!(IE)]> <!--> <html lang="en" class="no-js has-comments blog-bits section-technology" itemscope="" xmlns:og="//"> <!--<![endif]--> <!--[if IE 9]> <html lang="en" class="no-js ie9 lt-ie10 has-comments blog-bits section-technology" xmlns:og="//"> <![endif]--> <!--[if IE 8]> <html lang="en" class="no-js ie8 lt-ie10 lt-ie9 has-comments blog-bits section-technology" xmlns:og="//"> <![endif]--> <!--[if (lt IE 8)]> <html lang="en" class="no-js lt-ie10 lt-ie9 lt-ie8 has-comments blog-bits section-technology" xmlns:og="//"> <![endif]--> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <meta name="communityAssetLabels" content="NYT_Blogs"> <meta name="communityAssetTaxonomy" content="blogs/bits/The Internet Archive, Trying to Encompass All Creation ( 20141031 )"> <meta name="communityAssetURL" content="//"> <meta name="PT" content="Blogs"> <meta name="PST" content="Blog Post"> <meta name="sourceApp" content="blogs"> <meta itemprop="identifier" name="blogpostid" content="100000003209345"> <meta name="author" content="David Streitfeld"> <meta name="CLMST" content="David Streitfeld"> <meta name="byl" content="David Streitfeld"> <meta name="article:author" content="//"> <meta name="GBLC" content="Internet"> <meta name="PUV" content="normal"> <meta property="article:published_time" itemprop="datePublished" content="1414785669"> <meta name="PUD" content="20141031"> <meta name="pdate" content="20141031"> <meta name="ptime" content="20141031160109"> <meta name="utime" content="20141103122631"> <meta property="article:modified_time" itemprop="dateModified" content="2014-11-03T12:26:31-05:00"> <meta name="DISPLAYDATE" content="October 31, 2014"> <meta name="dat" content="October 31, 2014"> <meta itemprop="articleSection" property="article:section" content="Technology"> <meta name="CG" content="Technology"> <meta name="SCG" content="bits"> <meta name="BN" content="bits"> <meta name="robots" content="noodp"> <meta name="per" content="Kahle, Brewster"> <meta name="des" content="Archives and Records;Computers and the Internet"> <meta name="org" content="Internet Archive"> <meta property="article:tag" content="Kahle Brewster"> <meta property="article:tag" content="Archives and Records"> <meta property="article:tag" content="Computers and the Internet"> <meta property="article:tag" content="Internet Archive"> <meta property="article:tag" content="Internet"> <meta name="keywords" content="Kahle Brewster,Archives and Records,Computers and the Internet,Internet Archive,Internet"> <meta name="news_keywords" content="Archive,Brewster Kahle,Computers and the Internet,Internet Archive"> <link rel="alternate" type="application/json+oembed" href="" title="The Internet Archive, Trying to Encompass All Creation"> <meta name="description" content="The Internet Archive has 12 million items in its digital collection. Under expansion plans announced this week, that is merely the beginning."> <meta name="hdl_p" content="Internet Archive Aims to Preserve All of Creation"> <meta name="hdl" content="The Internet Archive, Trying to Encompass All Creation"> <meta property="og:title" content="The Internet Archive, Trying to Encompass All Creation"> <meta property="og:type" content="article"> <meta property="og:url" content=""> <meta property="og:site_name" content="Bits Blog"> <meta property="og:description" content="The Internet Archive has 12 million items in its digital collection. Under expansion plans announced this week, that is merely the beginning."> <meta property="og:image" content=""> <meta name="thumbnail_150" content=""> <meta name="twitter:card" value="summary"> <meta name="twitter:title" content="The Internet Archive, Trying to Encompass All Creation"> <meta name="twitter:url" content=""> <meta name="twitter:description" content="The Internet Archive has 12 million items in its digital collection. Under expansion plans announced this week, that is merely the beginning."> <meta name="twitter:image" content=""> <meta itemprop="thumbnailUrl" content="" name="thumbnail"> <meta name="twitter:site" content="@nytimesbits"> <link rel="prev" title="The New Thing in Google Flu Trends Is Traditional Data" href=""> <link rel="next" title="Amazon, Unclear on Diversity" href=""> <link rel="canonical" href=""> <link rel="shortlink" href="//"> <meta name="adxPage" content=""> <meta property="article:collection" content=""> <meta name="dfp-ad-unit-path" content="technology/bits"> <title>The Internet Archive, Trying to Encompass All Creation - The New York Times</title> <script id="nyt-capsule-data" type="text/json"> { "lastTransform": "2022-06-07T22:10:13.522Z", "isCanonical": true, "isArticleOrInt": true, "isArticle": true, "isInt": false } </script> <script src="" type="text/javascript"></script> <!--[if (gt IE 9)|!(IE)]> <!--> <link rel="stylesheet" type="text/css" media="screen" href=""> <!--<![endif]--> <!--[if lte IE 9]> <link rel="stylesheet" type="text/css" media="screen" href="" /> <![endif]--> <link rel="stylesheet" type="text/css" media="screen" href=""> <link rel="stylesheet" type="text/css" media="print" href=""> <script> var NYT = NYT || {}, NYTD = NYTD || {}; </script> <script id="magnum-feature-flags" type="application/json"> ["limitFabrikSave", "facebookOauthButton", "videoVHSCover", "videoVHSShareTools", "videoVHSEmbeddedOnly", "removeInternationalEdition", "piiBlockDFP", "indexAsHeaderBidder", "caslOpt"] </script> <script> var require = { baseUrl: '', waitSeconds: 20, paths: { 'foundation': 'blogs/latest/js/foundation', 'blogs': 'blogs/latest/js/blogs', 'auth/mtr': '', 'auth/growl': '', } }; </script> <!--[if (gte IE 9)|!(IE)]> <!--> <script data-main="foundation/main" src=""></script> <!--<![endif]--> <!--[if lt IE 9]> <script> = { '*': { 'foundation/main': 'foundation/legacy_main' } }; </script> <script data-main="foundation/legacy_main" src=""></script> <![endif]--> <script> require(['foundation/main'], function() { require(['auth/mtr']); }); </script> <!--esi <script id="user-info-data" type="application/json"> <esi:include src="/svc/web-products/userinfo-v3.json" /> </script> --> <link rel="stylesheet" type="text/css" href=""> <link rel="stylesheet" type="text/css" href=""> </head> <body class="single single-post postid-165351 blog-universal nyt-blog-bits" id="" data-env="production" data-region="ewr1"> <div id="shell" class="shell"> <header id="masthead" class="masthead masthead-theme-standard" role="banner"> <div class="container"> <div class="quick-navigation button-group"> <button class="button sections-button"><i class="icon sprite-icon"></i><span class="button-text">Sections</span></button> <button class="button home-button" data-href="" title="Go to the home page to see the latest top stories."><i class="icon sprite-icon"></i> <span class="button-text">Home</span> </button> <button class="button search-button"><i class="icon sprite-icon"></i><span class="button-text">Search</span></button> <a class="button skip-button skip-to-content visually-hidden focusable" href="#main">Skip to content</a> </div> <!-- close button-group --> <div class="branding"> <h2 class="branding-heading"> <a id="branding-heading-link" href=""> <span class="visually-hidden">The New York Times</span> </a> </h2> <script> window.magnum.writeLogo('small', '', 'Technology', 'masthead-theme-standard', 'standard', 'branding-heading-link', 'blogs'); </script> </div> <!-- close branding --> <div class="story-meta"> <h6 class="kicker"> <span class="kicker-label"> <a href=""> Bits </a> </span> <span class="pipe">|</span> The Internet Archive, Trying to Encompass All Creation </h6> </div> <!-- close story-meta --> <!-- close user-tools --> </div> <!-- close container --> <div class="search-flyout-panel flyout-panel"> <button class="button close-button" type="button"><i class="icon"></i><span class="visually-hidden">Close search</span></button> <nav class="search-form-control form-control layout-horizontal"> <h2 class="visually-hidden">Site Search Navigation</h2> <form class="search-form" role="search"> <div class="control"> <div class="label-container visually-hidden"> <label for="search-input">Search</label> </div> <div class="field-container"> <input id="search-input" name="search-input" type="text" class="search-input text" autocomplete="off" placeholder="Search"> <button type="button" class="button clear-button" tabindex="-1" aria-describedby="clear-search-input"><i class="icon"></i><span id="clear-search-input" class="visually-hidden">Clear this text input</span></button> <div class="auto-suggest" style="display: none;"> <ol></ol> </div> <button class="button submit-button" type="submit">Go</button> </div> </div> <!-- close control --> </form> </nav> </div> <!-- close flyout-panel --> <div id="notification-modals" class="notification-modals"></div> </header> <nav id="ribbon" class="ribbon ribbon-start nocontent robots-nocontent" aria-hidden="true"> <div class="nocontent robots-nocontent"> <ol class="ribbon-menu"> <li class="collection ribbon-loader"> <div class="loader "><span class="visually-hidden">Loading...</span></div> </li> </ol> <div class="ribbon-navigation-container"> <nav class="ribbon-navigation next"> <span class="visually-hidden">See next articles</span> <div class="arrow arrow-right"> <div class="arrow-conceal"></div> </div> </nav> <nav class="ribbon-navigation previous"> <span class="visually-hidden">See previous articles</span> <div class="arrow arrow-left"> <div class="arrow-conceal"></div> </div> </nav> </div> </div> <!-- close nocontent --> </nav> <nav id="navigation" class="navigation"> <h2 class="visually-hidden">Site Navigation</h2> </nav> <!-- close navigation --> <nav id="mobile-navigation" class="mobile-navigation hidden"> <h2 class="visually-hidden">Site Mobile Navigation</h2> </nav> <!-- close mobile-navigation --> <div id="navigation-edge" class="navigation-edge"></div> <main id="main" class="main" role="main"> <div id="story" class="story theme-main"> <div id="bits" class="spanAC blog"> <header id="blogs-masthead" class="wrap"> <div class="ad sponsortile-ad hidden nocontent robots-nocontent"> <div class="caption">Supported by</div> </div> <div class="element1"> <a href=";action=Click&amp;region=Header&amp;pgtype=Blogs&amp;version=Blog%20Post&amp;contentCollection=Technology"> <div id="logo" class="archived-blog-post"> <div class="w75 left site-header-logo"><img src="" alt="Bits"></div> <div class="site-title">Bits</div> <div class="site-description">Business, Innovation, Technology, Society</div> </div> </a> </div> <div class="element2"> <div class="inlineSearchControl"> <form method="get" id="searchform" action="//"> <input type="text" value="" name="s" id="s" class="text" placeholder="Search Bits"> <button type="submit" class="searchsubmit">Search</button> </form> </div> </div> <div class="clear"></div> </header> <div class="a-column " id="a-col"> <div id="content" class="hfeed"> <article class="post-165351 post type-post status-publish hentry category-internet tag-archives-and-records tag-computers-and-the-internet tag-internet-archive tag-kahle-brewster per-kahle-brewster des-archives-and-records des-computers-and-the-internet org-internet-archive news_keywords-archive news_keywords-brewster-kahle news_keywords-computers-and-the-internet news_keywords-internet-archive splitPostView" id="post-165351"> <header class="postHeader"> <div class="postKicker"> <span class="kicker"><a href=";version=Blog%20Post&amp;action=Click&amp;contentCollection=Technology&amp;pgtype=Blogs&amp;region=Header">Internet</a></span> </div> <h1 itemprop="headline" class="entry-title">The Internet Archive, Trying to Encompass All Creation</h1> <div class="story-meta-footer"> <address class="byline author vcard" itemprop="author creator" itemtype="//"> By David Streitfeld </address> <!-- date published --><time class="dateline " datetime="2014-10-31T20:01:09+00:00" itemprop="datePublished">October 31, 2014 4:01 pm</time> <!-- date updated --><span class="visually-hidden updated">October 31, 2014 4:01 pm</span><button class="button comments-button theme-kicker" data-skip-to-para-id=""> </button> </div> </header> <div class="entry-content"> <div class="w592"> <figure class="media photo promo" itemprop="associatedMedia" itemscope="" itemid="" itemtype="//" role="group"><span class="visually-hidden">Photo</span> <div class="image"><img itemprop="url" itemid="" src="" id="100000003208290" width="592" height="427" alt="The Internet Archive occupies a converted church in San Francisco&#x2019;s Richmond District."> <meta itemprop="height" content="427"> <meta itemprop="width" content="592"> </div> <figcaption class="caption" itemprop="description"><span class="caption-text">The Internet Archive occupies a converted church in San Francisco&#x2019;s Richmond District.</span><span class="credit" itemprop="copyrightHolder"><span class="visually-hidden">Credit</span> David Rinehart</span> </figcaption> </figure> </div> <div id="sharetools-story" class="sharetools theme-classic sharetools-story" data-shares="email|Email,facebook|Share,twitter|Tweet,save|Save,show-all|More,ad" data-url="//" data-title="The Internet Archive, Trying to Encompass All Creation" data-description="The Internet Archive has 12 million items in its digital collection. Under expansion plans announced this week, that is merely the beginning."> </div> <p class="story-body-text" itemprop="articleBody">Brewster Kahle is a librarian by training and temperament. In the mid-1990s, when many saw the nascent World Wide Web as a place to sell things, he saw it as data that cried out to be preserved and cataloged. Later, he widened his scope to include material &#x2014; film, books, music &#x2014; that was not native to the web but could be digitally gathered there.</p> <p class="story-body-text" itemprop="articleBody">By most standards, Mr. Kahle has been pretty successful. <a href="">The Internet Archive</a> serves from two to three million visitors a day with such tools as the Wayback Machine, which provides snapshots of 435 billion Web pages saved over time. The archive has seven million texts (you could call them books), 2.1 million audio recordings, and 1.8 million videos. It is an immense library.</p> <p class="story-body-text" itemprop="articleBody">Mr. Kahle has even bigger dreams, however. With a limited staff, the archive can conserve only so much. But if anyone can become a curator, the archive may one day resemble one of those Borgesian fantasies of the Total Library, a place that not only collects the world but becomes it.</p> <p class="story-body-text" itemprop="articleBody">&#x201C;We thought the machines were going to save us &#x2014; crawling the web, digitizing the books, organizing the information &#x2014; but we were wrong,&#x201D; Mr. Kahle said. &#x201C;Communities of people are at the heart of curation.&#x201D;</p> <p class="story-body-text" itemprop="articleBody">At an event Tuesday night at the converted San Francisco church that serves as the archive&#x2019;s headquarters, the nonprofit&#x2019;s staff showed off exactly how it and communities are going to be &#x201C;building libraries together,&#x201D; as the catchphrase of the evening had it. An ample crowd was in attendance, despite San Francisco battling it out in the World Series at that very moment.</p> <p class="story-body-text" itemprop="articleBody">A new book scanner was presented; Robert Miller, the archive&#x2019;s director of books, literally unveiled it. This baby was only 40 inches tall and 62 pounds, versus the earlier version&#x2019;s six feet and 350 pounds. In other words, it is portable, and can be taken to collections that are too fragile or cumbersome to make their own way to the archive. It&#x2019;s much easier to use, too.</p> <p class="story-body-text" itemprop="articleBody">Television is getting increasing emphasis at the archive, with hundreds of thousands of news reports that are searchable. Now, it is focusing on political ads as well.</p> <p class="story-body-text" itemprop="articleBody">Roger Macdonald, director of the Television Archive, introduced the <a href="">Philly Political Media Watch Project</a>, a pilot program by the archive, the Sunlight Foundation, Philadelphia&#x2019;s Committee of Seventy (a government watchdog), the Linguistic Data Consortium at the University of Pennsylvania, and the University of Delaware&#x2019;s Center for Community Research and Service.</p> <p class="story-body-text" itemprop="articleBody">The notion here is that political ads fly by on television, giving the viewer little context in which to judge them. Like the ad shown to the archive crowd where a Philadelphia candidate accused his opponent of burning down a church. Trumped-up charge or heinous act? No one had a clue.</p> <p class="story-body-text" itemprop="articleBody">The project is experimenting with creating a resource library of political ads and the TV news broadcasts in which they appeared. The goal is to help journalists and researchers compare the ads from a single sponsor or group, see who is paying for them and, ultimately, try to determine how well the local news media are covering the issues behind the ads. If the pilot works out in Philadelphia, it will unroll nationally in time for the 2016 elections.</p> <p class="story-body-text" itemprop="articleBody">&#x201C;This is a modest step to providing individual citizens with the information they need,&#x201D; Mr. Macdonald said.</p> <p class="story-body-text" itemprop="articleBody"><a href="">A third area of focus is music</a>, which is expanding on previous efforts. In a throwback of sorts to an earlier era, a prototype listening room has been set up at the archive for physical patrons.</p> <p class="story-body-text" itemprop="articleBody">&#x201C;Preserving music is now urgent as tapes are disintegrating, disc formats are used less, and new web-only music has blossomed,&#x201D; Mr. Kahle said. &#x201C;We are now working with top archives, collectors and labels to preserve our treasures.&#x201D; Among them: the <a title="The site." href="">ARChive of Contemporary Music</a> and <a href="">Musica Omnia</a>, a label specializing in classical music.</p> <p class="story-body-text" itemprop="articleBody">Yet another new archive effort involves the website. The existing site &#x201C;kind of looks like it&#x2019;s a 10-year-old website, because it&#x2019;s a 10-year-old website,&#x201D; Mr. Kahle explained. A <a href="" title="The site.">new site is in beta</a>. It prominently encourages users to use the site to assemble their own virtual libraries of material, which they can return to when they want to hear, for instance, Cuban jazz from the 1940s or read &#x201C;Constitutional Thought in Sixteenth-Century France: A Study in the Evolution of Ideas.&#x201D;</p> <p class="story-body-text" itemprop="articleBody">&#x201C;It turns out people want old stuff, which I think is a good thing,&#x201D; Mr. Kahle said.</p> <h4>Related Coverage:</h4> <div class="nyt-article-promo"> <h4><a href="">In a Flood Tide of Digital Data, an Ark Full of Books</a></h4> <p class="nyt-article-byline">By DAVID STREITFELD</p> <p class="nyt-article-summary">As society embraces all forms of digital entertainment, a latter-day Noah is looking the other way. Brewster Kahle, who runs the Internet Archive, a nonprofit, hopes to collect one copy of every book.</p> </div> <footer class="story-footer"> <div class="story-meta"> <p class="story-print-citation">A version of this article appears in print on 11/03/2014, on page <span itemprop="printSection">B</span><span itemprop="printPage">6</span> of the <span itemprop="printEdition">NewYork</span> edition with the headline: Internet Archive Aims to Preserve All of Creation.</p> </div> </footer> </div> <footer class="entry-footer"> <button class="button comments-button theme-speech-bubble"></button> <div id="sharetools-post-footer" class="sharetools theme-classic" data-shares="show-all|Share" data-url="//" data-title="The Internet Archive, Trying to Encompass All Creation" data-description="The Internet Archive has 12 million items in its digital collection. Under expansion plans announced this week, that is merely the beginning."> </div> <div class="footer-tags"> </div> </footer> </article> <section id="whats-next" class="whats-next nocontent robots-nocontent"> <h2 class="visually-hidden">What&apos;s Next</h2> <div class="nocontent robots-nocontent"> <div class="loader-container"> <div class="loader loader-t-logo-32x32-ecedeb-ffffff"><span class="visually-hidden">Loading...</span></div> </div> </div> <!-- close nocontent --> </section> </div> <!-- end content --> </div> <!-- end .a-column --> <div class="cColumn nocontent robots-nocontent"> <aside> <div class="postNavigation"> <ul class="opposingFloatControl wrap"> <li class="element1"> <span class="previous">Previous Post</span> <div class="arrow arrow-left"> <div class="arrow-conceal"></div> </div> <a class="postTitle" href="" title="The New Thing in Google Flu Trends Is Traditional Data"> The New Thing in Google Flu Trends Is Traditional Data </a> </li> <li class="element2"> <span class="next">Next Post</span> <div class="arrow arrow-right"> <div class="arrow-conceal"></div> </div> <a class="postTitle" href="" title="Amazon, Unclear on Diversity"> Amazon, Unclear on Diversity </a> </li> </ul> </div> <div id="nyt_promo_widget-10" class="module nocontent widget_nyt_promo_widget"> <div style="font-family: nyt-franklin, arial, sans-serif; padding-top: 12px; font-size: 14px; line-height: 17px;"> Visit the <a href="" style="font-weight: 600;">Technology section</a> for complete coverage of the industry. <a href="" style="font-family: nyt-franklin, arial, sans-serif; font-weight: 600;">&#xBB;</a> </div> </div> <div id="nyt_blog_social_media-5" class="module nocontent widget_nyt_blog_social_media"> <!-- begin #blog-follow-widget --> <div id="blog-follow-widget" class="blog-fw-single "> <h4 class=""> <span class="blog-fw-generic-followmessage">Follow <span class="blog-fw-desk">us on</span></span> <span class="blog-fw-specific-followmessage"><span class="blog-fw-username"><a href="" title="Follow @nytimesbits on twitter">@nytimesbits</a></span> on <span class="blog-fw-twitter">twitter</span></span> </h4> <ul> <li class="blog-fw-twitter single"><a href="" title="Follow nytimesbits on Twitter">Twitter</a></li> </ul> <p class="blog-fw-followbutton"><a href="" title="Follow @nytimesbits on twitter">Follow</a></p> </div> <!-- end #blog-follow-widget --> </div> </aside> </div> <div class="clear"></div> </div> <!-- end blog wrap --> <div class="hideContent"></div> <div id="SponLink" class="text-ad bottom-left-ad nocontent robots-nocontent"></div> </div> <div class="search-overlay"></div> </main> <!-- close main --> <footer id="page-footer" class="page-footer" role="contentinfo"> <nav> <ul> <li> <a href="" itemprop="copyrightNotice"> &#xA9; <span itemprop="copyrightYear">2017</span> <span itemprop="copyrightHolder provider sourceOrganization" itemscope="" itemtype="//" itemid="//"> <span itemprop="name"> The New York Times Company</span> <meta itemprop="tickerSymbol" content="NYSE NYT"> </span> </a> </li> <li><a href="">Contact Us</a></li> <li><a href="">Work With Us</a></li> <li><a href="">Advertise</a></li> <li><a href="">Your Ad Choices</a></li> <li><a href="">Privacy</a></li> <li><a href="" itemprop="usageTerms">Terms of Service</a></li> <li class="last-item"><a href="">Terms of Sale</a></li> </ul> </nav> <nav class="last-nav"> <ul> <li><a href="">Site Map</a></li> <li><a href="">Help</a></li> <li><a href="">Site Feedback</a></li> <li class="last-item"><a href="">Subscriptions</a></li> </ul> </nav> </footer> </div> <!-- close shell --> <script> require(['foundation/main'], function() { require(['blogs/main']); require(['']); // Blacklist some trackers imported from nyt5 legacy libraries $(document).ajaxSend(function(event, jqXHR, settings) { if (settings.url.indexOf && settings.url.indexOf('amazon-adsystem') !== -1) { jqXHR.abort(); } }); }); </script> <script src=""></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10