CINXE.COM
The Whelming › The Reference Wars
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en-GB"> <head profile="http://gmpg.org/xfn/11"> <title>The Whelming › The Reference Wars</title> <meta http-equiv="content-type" content="text/html; charset=UTF-8" /> <link rel="stylesheet" type="text/css" media="screen,projection" href="http://magnusmanske.de/wordpress/wp-content/themes/veryplaintxt/style.css" title="veryplaintxt" /> <link rel="stylesheet" type="text/css" media="print" href="http://magnusmanske.de/wordpress/wp-content/themes/veryplaintxt/print.css" /> <link rel="alternate" type="application/rss+xml" href="http://magnusmanske.de/wordpress/feed" title="The Whelming RSS feed" /> <link rel="alternate" type="application/rss+xml" href="http://magnusmanske.de/wordpress/comments/feed" title="The Whelming comments RSS feed" /> <link rel="pingback" href="http://magnusmanske.de/wordpress/xmlrpc.php" /> <meta name='robots' content='max-image-preview:large' /> <style>img:is([sizes="auto" i], [sizes^="auto," i]) { contain-intrinsic-size: 3000px 1500px }</style> <link rel="alternate" type="application/rss+xml" title="The Whelming » The Reference Wars Comments Feed" href="http://magnusmanske.de/wordpress/archives/378/feed" /> <script type="text/javascript"> /* <![CDATA[ */ window._wpemojiSettings = {"baseUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.0.3\/72x72\/","ext":".png","svgUrl":"https:\/\/s.w.org\/images\/core\/emoji\/15.0.3\/svg\/","svgExt":".svg","source":{"concatemoji":"http:\/\/magnusmanske.de\/wordpress\/wp-includes\/js\/wp-emoji-release.min.js?ver=6.7.1"}}; /*! This file is auto-generated */ !function(i,n){var o,s,e;function c(e){try{var t={supportTests:e,timestamp:(new Date).valueOf()};sessionStorage.setItem(o,JSON.stringify(t))}catch(e){}}function p(e,t,n){e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(t,0,0);var t=new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data),r=(e.clearRect(0,0,e.canvas.width,e.canvas.height),e.fillText(n,0,0),new Uint32Array(e.getImageData(0,0,e.canvas.width,e.canvas.height).data));return t.every(function(e,t){return e===r[t]})}function u(e,t,n){switch(t){case"flag":return n(e,"\ud83c\udff3\ufe0f\u200d\u26a7\ufe0f","\ud83c\udff3\ufe0f\u200b\u26a7\ufe0f")?!1:!n(e,"\ud83c\uddfa\ud83c\uddf3","\ud83c\uddfa\u200b\ud83c\uddf3")&&!n(e,"\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f","\ud83c\udff4\u200b\udb40\udc67\u200b\udb40\udc62\u200b\udb40\udc65\u200b\udb40\udc6e\u200b\udb40\udc67\u200b\udb40\udc7f");case"emoji":return!n(e,"\ud83d\udc26\u200d\u2b1b","\ud83d\udc26\u200b\u2b1b")}return!1}function f(e,t,n){var r="undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?new OffscreenCanvas(300,150):i.createElement("canvas"),a=r.getContext("2d",{willReadFrequently:!0}),o=(a.textBaseline="top",a.font="600 32px Arial",{});return e.forEach(function(e){o[e]=t(a,e,n)}),o}function t(e){var t=i.createElement("script");t.src=e,t.defer=!0,i.head.appendChild(t)}"undefined"!=typeof Promise&&(o="wpEmojiSettingsSupports",s=["flag","emoji"],n.supports={everything:!0,everythingExceptFlag:!0},e=new Promise(function(e){i.addEventListener("DOMContentLoaded",e,{once:!0})}),new Promise(function(t){var n=function(){try{var e=JSON.parse(sessionStorage.getItem(o));if("object"==typeof e&&"number"==typeof e.timestamp&&(new Date).valueOf()<e.timestamp+604800&&"object"==typeof e.supportTests)return e.supportTests}catch(e){}return null}();if(!n){if("undefined"!=typeof Worker&&"undefined"!=typeof OffscreenCanvas&&"undefined"!=typeof URL&&URL.createObjectURL&&"undefined"!=typeof Blob)try{var e="postMessage("+f.toString()+"("+[JSON.stringify(s),u.toString(),p.toString()].join(",")+"));",r=new Blob([e],{type:"text/javascript"}),a=new Worker(URL.createObjectURL(r),{name:"wpTestEmojiSupports"});return void(a.onmessage=function(e){c(n=e.data),a.terminate(),t(n)})}catch(e){}c(n=f(s,u,p))}t(n)}).then(function(e){for(var t in e)n.supports[t]=e[t],n.supports.everything=n.supports.everything&&n.supports[t],"flag"!==t&&(n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&n.supports[t]);n.supports.everythingExceptFlag=n.supports.everythingExceptFlag&&!n.supports.flag,n.DOMReady=!1,n.readyCallback=function(){n.DOMReady=!0}}).then(function(){return e}).then(function(){var e;n.supports.everything||(n.readyCallback(),(e=n.source||{}).concatemoji?t(e.concatemoji):e.wpemoji&&e.twemoji&&(t(e.twemoji),t(e.wpemoji)))}))}((window,document),window._wpemojiSettings); /* ]]> */ </script> <style id='wp-emoji-styles-inline-css' type='text/css'> img.wp-smiley, img.emoji { display: inline !important; border: none !important; box-shadow: none !important; height: 1em !important; width: 1em !important; margin: 0 0.07em !important; vertical-align: -0.1em !important; background: none !important; padding: 0 !important; } </style> <style id='activitypub-followers-style-inline-css' type='text/css'> .activitypub-follower-block.is-style-compact .activitypub-handle,.activitypub-follower-block.is-style-compact .sep{display:none}.activitypub-follower-block.is-style-with-lines ul li{border-bottom:.5px solid;margin-bottom:.5rem;padding-bottom:.5rem}.activitypub-follower-block.is-style-with-lines ul li:last-child{border-bottom:none}.activitypub-follower-block.is-style-with-lines .activitypub-handle,.activitypub-follower-block.is-style-with-lines .activitypub-name{text-decoration:none}.activitypub-follower-block.is-style-with-lines .activitypub-handle:hover,.activitypub-follower-block.is-style-with-lines .activitypub-name:hover{text-decoration:underline}.activitypub-follower-block ul{margin:0!important;padding:0!important}.activitypub-follower-block li{display:flex;margin-bottom:1rem}.activitypub-follower-block img{border-radius:50%;height:40px;margin-right:var(--wp--preset--spacing--20,.5rem);width:40px}.activitypub-follower-block .activitypub-link{align-items:center;color:inherit!important;display:flex;flex-flow:row nowrap;max-width:100%;text-decoration:none!important}.activitypub-follower-block .activitypub-handle,.activitypub-follower-block .activitypub-name{text-decoration:underline;text-decoration-thickness:.8px;text-underline-position:under}.activitypub-follower-block .activitypub-handle:hover,.activitypub-follower-block .activitypub-name:hover{text-decoration:none}.activitypub-follower-block .activitypub-name{font-size:var(--wp--preset--font-size--normal,16px)}.activitypub-follower-block .activitypub-actor{font-size:var(--wp--preset--font-size--small,13px);overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.activitypub-follower-block .sep{padding:0 .2rem}.activitypub-follower-block .wp-block-query-pagination{margin-top:1.5rem}.activitypub-follower-block .activitypub-pager{cursor:default}.activitypub-follower-block .activitypub-pager.current{opacity:.33}.activitypub-follower-block .page-numbers{padding:0 .2rem}.activitypub-follower-block .page-numbers.current{font-weight:700;opacity:1} </style> <style id='activitypub-follow-me-style-inline-css' type='text/css'> .activitypub__modal.components-modal__frame{background-color:#f7f7f7;color:#333}.activitypub__modal.components-modal__frame .components-modal__header-heading,.activitypub__modal.components-modal__frame h4{color:#333;letter-spacing:inherit;word-spacing:inherit}.activitypub__modal.components-modal__frame .components-modal__header .components-button:hover{color:var(--wp--preset--color--white)}.activitypub__dialog{max-width:40em}.activitypub__dialog h4{line-height:1;margin:0}.activitypub__dialog .activitypub-dialog__section{margin-bottom:2em}.activitypub__dialog .activitypub-dialog__remember{margin-top:1em}.activitypub__dialog .activitypub-dialog__description{font-size:var(--wp--preset--font-size--normal,.75rem);margin:.33em 0 1em}.activitypub__dialog .activitypub-dialog__button-group{align-items:flex-end;display:flex;justify-content:flex-end}.activitypub__dialog .activitypub-dialog__button-group svg{height:21px;margin-right:.5em;width:21px}.activitypub__dialog .activitypub-dialog__button-group input{background-color:var(--wp--preset--color--white);border-radius:50px 0 0 50px;border-width:1px;border:1px solid var(--wp--preset--color--black);color:var(--wp--preset--color--black);flex:1;font-size:16px;height:inherit;line-height:1;margin-right:0;padding:15px 23px}.activitypub__dialog .activitypub-dialog__button-group button{align-self:center;background-color:var(--wp--preset--color--black);border-radius:0 50px 50px 0;border-width:1px;color:var(--wp--preset--color--white);font-size:16px;height:inherit;line-height:1;margin-left:0;padding:15px 23px;text-decoration:none}.activitypub__dialog .activitypub-dialog__button-group button:hover{border:inherit}.activitypub-follow-me-block-wrapper{width:100%}.activitypub-follow-me-block-wrapper.has-background .activitypub-profile,.activitypub-follow-me-block-wrapper.has-border-color .activitypub-profile{padding-left:1rem;padding-right:1rem}.activitypub-follow-me-block-wrapper .activitypub-profile{align-items:center;display:flex;padding:1rem 0}.activitypub-follow-me-block-wrapper .activitypub-profile .activitypub-profile__avatar{border-radius:50%;height:75px;margin-right:1rem;width:75px}.activitypub-follow-me-block-wrapper .activitypub-profile .activitypub-profile__content{flex:1;min-width:0}.activitypub-follow-me-block-wrapper .activitypub-profile .activitypub-profile__handle,.activitypub-follow-me-block-wrapper .activitypub-profile .activitypub-profile__name{line-height:1.2;margin:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.activitypub-follow-me-block-wrapper .activitypub-profile .activitypub-profile__name{font-size:1.25em}.activitypub-follow-me-block-wrapper .activitypub-profile .activitypub-profile__follow{align-self:center;background-color:var(--wp--preset--color--black);color:var(--wp--preset--color--white);margin-left:1rem} </style> <link rel='stylesheet' id='wp-components-css' href='http://magnusmanske.de/wordpress/wp-includes/css/dist/components/style.min.css?ver=6.7.1' type='text/css' media='all' /> <link rel="https://api.w.org/" href="http://magnusmanske.de/wordpress/wp-json/" /><link rel="alternate" title="JSON" type="application/json" href="http://magnusmanske.de/wordpress/wp-json/wp/v2/posts/378" /><link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://magnusmanske.de/wordpress/xmlrpc.php?rsd" /> <meta name="generator" content="WordPress 6.7.1" /> <link rel="canonical" href="http://magnusmanske.de/wordpress/archives/378" /> <link rel='shortlink' href='http://magnusmanske.de/wordpress/?p=378' /> <link rel="alternate" title="oEmbed (JSON)" type="application/json+oembed" href="http://magnusmanske.de/wordpress/wp-json/oembed/1.0/embed?url=http%3A%2F%2Fmagnusmanske.de%2Fwordpress%2Farchives%2F378" /> <link rel="alternate" title="oEmbed (XML)" type="text/xml+oembed" href="http://magnusmanske.de/wordpress/wp-json/oembed/1.0/embed?url=http%3A%2F%2Fmagnusmanske.de%2Fwordpress%2Farchives%2F378&format=xml" /> <meta property="fediverse:creator" name="fediverse:creator" content="adminimum@magnusmanske.de" /> <style type="text/css" media="all"> /*<![CDATA[*/ /* CSS inserted by theme options */ body{font-family:verdana,geneva,sans-serif;font-size:90%;} body div#container { float: left; margin: 0 -200px 2em 0; } body div#content { margin: 3em 200px 0 0; } body div.sidebar { float: right; } body div#content div.hentry{text-align:justify;} body div#content h2,div#content h3,div#content h4,div#content h5,div#content h6{font-family:arial,helvetica,sans-serif;} body div#wrapper{max-width:55em;min-width:35em;width:80%;} body div.sidebar{text-align:center;} /*]]>*/ </style> <link rel="alternate" title="ActivityPub (JSON)" type="application/activity+json" href="http://magnusmanske.de/wordpress/archives/378" /> </head> <body class="wordpress y2024 m11 d28 h11 single s-y2016 s-m01 s-d06 s-h06 s-category-wikidata s-category-wikimedia-2 s-author-adminimum"> <div id="wrapper"> <div id="header"> <h1 id="blog-title"><a href="http://magnusmanske.de/wordpress/" title="The Whelming">The Whelming</a></h1> <div id="blog-description">Tech, tools, and tribulations</div> </div><!-- #header --> <div class="access"><span class="content-access"><a href="#content" title="Skip to content">Skip to content</a></span></div> <div id="globalnav"><ul id="menu"><li class="page_item_home home-link"><a href="http://magnusmanske.de/wordpress/" title="The Whelming" rel="home">Home</a></li><li class="page_item page-item-2"><a href="http://magnusmanske.de/wordpress/about">About</a></li></ul></div> <div id="container"> <div id="content" class="hfeed"> <div id="post-378" class="hentry p1 post publish author-adminimum category-wikidata category-wikimedia-2 y2016 m01 d06 h06"> <h2 class="entry-title">The Reference Wars</h2> <div class="entry-content"> <p>In a recent <a href="https://en.wikipedia.org/wiki/Wikipedia:Wikipedia_Signpost/2015-12-02/Op-ed">Wikipedia Signpost Op-Ed</a>, Andreas Kolbe wrote about Wikidata and references. He comes to the conclusion that Wikidata needs more (non-Wikipedia) references, a statement I wholeheartedly agree with. He also divines that this will never happen, that Wikidata is doomed, while at the same time somehow being controlled by Google and Microsoft; I will not comment on these “conclusions”, as others have already done so elsewhere.</p> <p>Andreas also uses my own <a href="https://tools.wmflabs.org/wikidata-todo/stats.php?reverse">Wikidata statistics</a> to make his point about missing references on Wikidata. The numbers I show are useful, IMHO, to show the remarkable progress of Wikidata, but they are much too crude to draw conclusions about the state of references there. Also, the impression I get from Andreas’ text is that, while Wikipedia has some issues, references are basically OK, whereas they are essentially non-existent in Wikidata.</p> <p>So I thought I’d have a look at some actual numbers, especially comparing Wikipedia and Wikidata in terms of references.</p> <p>One key issue is that there is no build-in way to get metrics about statements and references from Wikipedia. I therefore developed my own approach. Given a Wikipedia article, I use the <a href="https://rest.wikimedia.org/">REST API</a> to get HTML for the article. I then count the number of reference <em>uses</em> (essentially, <ref> tags) in the article; note that this number is larger then (or at least equal to) the number of references at the bottom of the page. Then, I strip the HTML tags, and count the number of sentences (starts with an upper-case character, has at least 50 characters, ends with a “.”); the numbers were confirmed manually for a few example articles through other sentence counting tools on the web, and yielded similar results. I then assume that each sentence in the article contains one statement (or fact); in reality, there are likely many such statements (such as the first sentence of a biographical article), but I am aiming for a lower boundary here. (Any sentence <em>not</em> containing a statement/fact should be deleted from Wikipedia anyway.) A useful metric from both the number of reference uses, and the number of statements (=sentences), is the references-per-statement (RPS) ratio.</p> <p>For Wikidata, a similar metric can be calculated. For practical purposes, I skip statements of the “string” type, as they are mostly external references in themselves (e.g. VIAF identifiers); I also skip “media”-type statements, as they should have “references” in their file description page on Commons. For references, I do <em>not</em> count “imported from Wikipedia”, as these are not “real” references, but rather placeholders for future improvement. Again, a RPS ratio can be computed.</p> <p>I then calculated these ratios for 4,683 Featured Articles from English Wikipedia and their associated Wikidata items (<a href="https://docs.google.com/spreadsheets/d/17400b7rzUx-jxCRR2sXma7ofG782ePe_jUCHDS2s7zo/edit?usp=sharing">data</a>). As these articles have been significantly worked over and approved by the English Wikipedia community, they should represent the “best case scenario” for Wikipedia.</p> <p>Indeed, the RPS ratio is higher for Wikipedia in 87% of cases, which would mean that Wikipedia is better referenced than Wikidata. But keep in mind that this represents the <a href="https://www.youtube.com/watch?v=OXRi28W-ENY">best of the best of the best</a> of English Wikipedia articles, fifteen years in the making, compared to a three-and-a-half-year old Wikidata (and references were not supported for the first year or so). This is as good as it gets for Wikipedia, and still, Wikidata has a better RPS in about 13% of cases.</p> <p>Even more interesting IMHO: Taking the mean of both number of statements and number of references for both Wikipedia and Wikidata, respectively, and calculating the RPS ratios for those means, yield 0.32 for Wikipedia and 0.15 for Wikidata. This seems counter-intuitive, given the previous 87/13 “ratio of ratios”. However, further investigation shows that only 1305 (~28%) of Wikidata items have any references at all, but where there are references, they usually outshine Wikipedia; about half of the items with at least one reference have a better RPS ratio than the respective Wikipedia article. This seems to indicate a “care factor” at work; where someone cared about adding references to the item, it was done quite well. Wikidata RPS ratios range up to 1.5, meaning two statements are, on average, supported by three references, whereas Wikipedia reaches “peak RPS ratio” at 0.93, or slightly less than one reference per statement.</p> <p>I believe these numbers show that Wikidata can equal and surpass Wikipedia in terms of “referencedness”, but it is a function of attention to the items. Which in turn is a matter of man- and bot-hours spent. Indeed, for the Wikidata showcase items (the equivalent of Featured Articles on Wikipedia), the Wikidata RPS ratio is better that that of the associated English Wikipedia article in 19 out of 24 cases (~80%).</p> <p>So will Wikidata ever catch up to Wikipedia in terms of RPS ratio? I think so. The ability of Wikidata to be reliably edited by a machine allows for improvement by automated and semi-automated bots, tools, games, on-wiki gadgets, etc. which allow for much steeper editing rate, as I <a href="http://magnusmanske.de/wordpress/?p=276">demonstrated previously</a> for images, where Wikidata went from nothing to second place in about two years, and is now angling for the pole position (~1.1M images at the moment). I see no reason to doubt this will happen to references as well.</p> </div> <div class="entry-meta"> This was written by <span class="vcard"><span class="fn n">Magnus</span></span>. Posted on <abbr class="published" title="2016-01-06T17:50:36+0100">Wednesday, January 6, 2016, at 17:50</abbr>. Filed under <a href="http://magnusmanske.de/wordpress/archives/category/wikidata" rel="category tag">Wikidata</a>, <a href="http://magnusmanske.de/wordpress/archives/category/wikimedia-2" rel="category tag">Wikimedia</a>. Bookmark the <a href="http://magnusmanske.de/wordpress/archives/378" title="Permalink to The Reference Wars" rel="bookmark">permalink</a>. Follow comments here with the <a href="http://magnusmanske.de/wordpress/archives/378/feed" title="Comments RSS to The Reference Wars" rel="alternate" type="application/rss+xml">RSS feed</a>. Both comments and trackbacks are currently closed. </div> </div><!-- .post --> <div class="comments"> <h3 class="comment-header" id="numcomments">6 Comments</h3> <ol id="comments" class="commentlist"> <li id="comment-5851" class="<br /> <b>Fatal error</b>: Uncaught ArgumentCountError: Too few arguments to function Activitypub\Comment::comment_class(), 1 passed in /home/www/wordpress/wp-includes/class-wp-hook.php on line 324 and exactly 3 expected in /home/www/wordpress/wp-content/plugins/activitypub/includes/class-comment.php:337 Stack trace: #0 /home/www/wordpress/wp-includes/class-wp-hook.php(324): Activitypub\Comment::comment_class(Array) #1 /home/www/wordpress/wp-includes/plugin.php(205): WP_Hook->apply_filters(Array, Array) #2 /home/www/wordpress/wp-content/themes/veryplaintxt/functions.php(128): apply_filters('comment_class', Array) #3 /home/www/wordpress/wp-content/themes/veryplaintxt/comments.php(32): veryplaintxt_comment_class() #4 /home/www/wordpress/wp-includes/comment-template.php(1629): require('/home/www/wordp...') #5 /home/www/wordpress/wp-content/themes/veryplaintxt/single.php(41): comments_template() #6 /home/www/wordpress/wp-includes/template-loader.php(106): include('/home/www/wordp...') #7 /home/www/wordpress/wp-blog-header.php(19): require_ in <b>/home/www/wordpress/wp-content/plugins/activitypub/includes/class-comment.php</b> on line <b>337</b><br />