CINXE.COM
Methodology for matching financial and patents databases - Corporate Invention Board
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-gb" lang="en-gb"> <head><script type="text/javascript" src="/_static/js/bundle-playback.js?v=HxkREWBo" charset="utf-8"></script> <script type="text/javascript" src="/_static/js/wombat.js?v=txqj7nKC" charset="utf-8"></script> <script>window.RufflePlayer=window.RufflePlayer||{};window.RufflePlayer.config={"autoplay":"on","unmuteOverlay":"hidden"};</script> <script type="text/javascript" src="/_static/js/ruffle/ruffle.js"></script> <script type="text/javascript"> __wm.init("https://web.archive.org/web"); __wm.wombat("http://www.corporateinventionboard.eu:80/en/methodology/methodology-for-matching-financial-and-patents-databases","20100328222851","https://web.archive.org/","web","/_static/", "1269815331"); </script> <link rel="stylesheet" type="text/css" href="/_static/css/banner-styles.css?v=S1zqJCYt" /> <link rel="stylesheet" type="text/css" href="/_static/css/iconochive.css?v=3PDvdIFv" /> <!-- End Wayback Rewrite JS Include --> <meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7"/> <base href="https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/methodology/methodology-for-matching-financial-and-patents-databases"/> <meta http-equiv="content-type" content="text/html; charset=utf-8"/> <meta name="robots" content="index, follow"/> <meta name="keywords" content="Patent data base, guo, groups, board, invention, matching, names, corporate, orbis, financial, priority, first, patent, subsidiaries, reviewed, filed, name, different methodology"/> <meta name="title" content="Methodology for matching financial and patents databases"/> <meta name="author" content="Administrator"/> <meta name="description" content="The priority patent portfolio of a given group is defined as the collection of the priority patents applied for by its “Global Ultimate Owner” (GUO) and by all its consolidated subsidiary companies - i.e those in which the GUO has a total participation higher or equal to 50,01%. The implementation of this rule requires matching the names of the GUO of the groups and the subsidiaries extracted from Orbis database. But it also requires matching those with the names of the assignees as listed in the Patstat database."/> <meta name="generator" content="Joomla! 1.5 - Open Source Content Management"/> <title>Methodology for matching financial and patents databases - Corporate Invention Board</title> <script type="text/javascript" src="/web/20100328222851js_/http://www.corporateinventionboard.eu/media/system/js/mootools.js"></script> <script type="text/javascript" src="/web/20100328222851js_/http://www.corporateinventionboard.eu/media/system/js/caption.js"></script> <link rel="stylesheet" href="/web/20100328222851cs_/http://www.corporateinventionboard.eu/templates/system/css/system.css" type="text/css"/> <link rel="stylesheet" href="/web/20100328222851cs_/http://www.corporateinventionboard.eu/templates/system/css/general.css" type="text/css"/> <link rel="stylesheet" type="text/css" href="/web/20100328222851cs_/http://www.corporateinventionboard.eu/templates/corpofinal5/css/template.css"/> <!--[if IE 6]><link rel="stylesheet" href="/templates/corpofinal5/css/template.ie6.css" type="text/css" media="screen" /><![endif]--> <!--[if IE 7]><link rel="stylesheet" href="/templates/corpofinal5/css/template.ie7.css" type="text/css" media="screen" /><![endif]--> <script type="text/javascript" src="/web/20100328222851js_/http://www.corporateinventionboard.eu/templates/corpofinal5/script.js"></script> </head> <body> <div id="art-page-background-simple-gradient"> </div> <div id="art-main"> <div class="art-Sheet"> <div class="art-Sheet-tl"></div> <div class="art-Sheet-tr"></div> <div class="art-Sheet-bl"></div> <div class="art-Sheet-br"></div> <div class="art-Sheet-tc"></div> <div class="art-Sheet-bc"></div> <div class="art-Sheet-cl"></div> <div class="art-Sheet-cr"></div> <div class="art-Sheet-cc"></div> <div class="art-Sheet-body"> <div class="art-Header"> <div class="art-Header-jpeg"></div> <div class="art-Logo"> <h1 id="name-text" class="art-Logo-name"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/">Corporate Invention Board</a></h1> <div id="slogan-text" class="art-Logo-text">For an in-depth analysis of global patent portfolios</div> <div id="ja-search"> <form action="index.php" method="post"> <div class="search"> <input name="searchword" id="mod_search_searchword" maxlength="20" alt="Search" class="inputbox" type="text" size="20" value="search..." onblur="if(this.value=='') this.value='search...';" onfocus="if(this.value=='search...') this.value='';"/> </div> <input type="hidden" name="task" value="search"/> <input type="hidden" name="option" value="com_search"/> <input type="hidden" name="Itemid" value="6"/> </form> </div> <div id="ja-lang"> <div id="jflanguageselection"><div class="rawimages"><span><a href="https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/fr/methodologie/methodologie-appariement"><img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/components/com_joomfish/images/flags/fr.gif" alt="French (Fr)" title="French (Fr)"/></a></span><span id="active_language"><a href="https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology/methodology-for-matching-financial-and-patents-databases"><img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/components/com_joomfish/images/flags/en.gif" alt="English (United Kingdom)" title="English (United Kingdom)"/></a></span><span><a href="https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/es/metodologia/metodologia-emparejamiento"><img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/components/com_joomfish/images/flags/es.gif" alt="Español(Spanish Formal International)" title="Español(Spanish Formal International)"/></a></span></div></div><!--Joom!fish V2.0.4 (Lightning)--> <!-- © 2003-2009 Think Network, released under the GPL. --> <!-- More information: at http://www.joomfish.net --> </div> </div> </div> <div class="art-nav"> <div class="l"></div> <div class="r"></div> <ul class="art-menu"><li class="item1"><a href="https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/"><span class="l"> </span><span class="r"> </span><span class="t">Home</span></a></li><li class="parent active item3"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology" class="active"><span class="l"> </span><span class="r"> </span><span class="t">Methodology</span></a><ul><li class="item4"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology/methodology-for-patents-analysis">for patents analysis</a></li><li class="item5"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology/methodology-for-identification-of-world-industrial-independent-groups">for identification groups</a></li><li id="current" class="active item6"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology/methodology-for-matching-financial-and-patents-databases" class="active">for matching databases</a></li></ul></li><li class="item7"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/data-sources"><span class="l"> </span><span class="r"> </span><span class="t">Data sources</span></a></li><li class="item8"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/corporate-rankings"><span class="l"> </span><span class="r"> </span><span class="t">Corporate Rankings</span></a></li><li class="parent item15"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/exploitation-of-the-cib-database"><span class="l"> </span><span class="r"> </span><span class="t">Exploitation of the CIB</span></a><ul><li class="item10"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/exploitation-of-the-cib-database/exploitation-of-the-cib-database">CIB database</a></li><li class="item16"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/exploitation-of-the-cib-database/current-research-projects">Research projects</a></li></ul></li><li class="item17"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/data-visualisation"><span class="l"> </span><span class="r"> </span><span class="t">Visualisation</span></a></li><li class="item11"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/news"><span class="l"> </span><span class="r"> </span><span class="t">News</span></a></li><li class="item9"><a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/the-project-team-and-partners"><span class="l"> </span><span class="r"> </span><span class="t">Project team</span></a></li></ul></div> <div class="art-contentLayout"> <div class="art-content-wide"> <div class="art-nostyle"> <span class="breadcrumbs pathway"> <a href="https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/" class="pathway">Home</a> <img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/images/M_images/arrow.png" alt=""/> <a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology" class="pathway">Methodology</a> <img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/images/M_images/arrow.png" alt=""/> for matching databases</span> </div> <div class="art-Post"> <div class="art-Post-tl"></div> <div class="art-Post-tr"></div> <div class="art-Post-bl"></div> <div class="art-Post-br"></div> <div class="art-Post-tc"></div> <div class="art-Post-bc"></div> <div class="art-Post-cl"></div> <div class="art-Post-cr"></div> <div class="art-Post-cc"></div> <div class="art-Post-body"> <div class="art-Post-inner"> <div class="art-PostMetadataHeader"> <h2 class="art-PostHeader"> Methodology for matching financial and patents databases </h2> </div> <div class="art-PostHeaderIcons art-metadata-icons"> <a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology/methodology-for-matching-financial-and-patents-databases?format=phocapdf" title="PDF" onclick="window.open(this.href,'win2','status=no,toolbar=no,scrollbars=yes,titlebar=no,menubar=no,resizable=yes,width=640,height=480,directories=no,location=no'); return false;" rel="nofollow"><img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/templates/corpofinal5/images/pdf_button.png" alt="PDF"/></a> | <a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/methodology/methodology-for-matching-financial-and-patents-databases?tmpl=component&print=1&page=" title="Print" onclick="window.open(this.href,'win2','status=no,toolbar=no,scrollbars=yes,titlebar=no,menubar=no,resizable=yes,width=640,height=480,directories=no,location=no'); return false;" rel="nofollow"><img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/templates/corpofinal5/images/printButton.png" alt="Print"/></a> | <a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/component/mailto/?tmpl=component&link=aHR0cDovL3d3dy5jb3Jwb3JhdGVpbnZlbnRpb25ib2FyZC5ldS9lbi9tZXRob2RvbG9neS9tZXRob2RvbG9neS1mb3ItbWF0Y2hpbmctZmluYW5jaWFsLWFuZC1wYXRlbnRzLWRhdGFiYXNlcw%3D%3D" title="E-mail" onclick="window.open(this.href,'win2','width=400,height=350,menubar=yes,resizable=yes'); return false;"><img src="/web/20100328222851im_/http://www.corporateinventionboard.eu/templates/corpofinal5/images/emailButton.png" alt="E-mail"/></a> </div> <div class="art-PostContent"> <div class="art-article"><div>The priority patent portfolio of a given group is defined as the collection of the priority patents applied for by its “Global Ultimate Owner” (GUO) and by all its consolidated subsidiary companies - i.e those in which the GUO has a total participation higher or equal to 50,01%. The implementation of this rule requires matching the names of the GUO of the groups and the subsidiaries extracted from Orbis database. But it also requires matching those with the names of the assignees as listed in the Patstat database. This automated pairing required a strict match between the character strings of the two databases, which raised two difficulties.</div> <h4><strong>Difficulties related to the matching of databases</strong></h4> <div>First of all, a company can appear with a different name and spelling within the patent database, for example, IBM and International Business Machines. It is thus difficult to regroup under a single label patents applied for with different applicant labels. This difficulty is overcome by using the harmonized names suggested in the Patstat database (see <a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/data-sources" title="Sources of the analyzed data">Data sources</a>). We can thus gather the variations of an assignee name under a single one, known as the harmonized one.</div> <p> </p> <div>A second difficulty rises when the name of the assignee which appears in the patent database does not correspond exactly to its legal name (GUO or subsidiary company name) used in the Orbis database; which can also differ from the common designation of this entity. Thus the Dutch group known as “Philips” in the Patstat patent database is listed in Orbis as a GUO whose name is Koninklijke Philips Electronics NV.</div> <h4><strong>Preliminary standardization of character strings</strong></h4> <div>The matching technique which is used in the Corporate Invention Board project builds on Tom Magerman’s research at the Catholic University of Leuwen. His <strong>methodology</strong>, developed in collaboration with the Organization of Economic Co-operation for the Development (OECD), the Eurostat directorate of the European commission and the European Patent Office (EPO) can be summarized in two stages.</div> <div>It is first necessary to proceed to a spelling check and a cleaning stage and to remove, for example, double spaces appearing between two words or blank spaces preceding a comma.</div> <div>Then, legal designations of the companies (such as Ltd, Corp, Its, Inc…) that appear systematically in the Orbis database, but seldom in the patent one, need to be removed in order to improve the matching between the two.</div> <div>This methodology allows us to identify more than 5 million priority patents registered by the 2400 studied groups.</div> <div><strong> </strong></div> <h4><strong>A reducible but inevitable margin of error</strong></h4> <div>As for any treatment of large databases, it would be illusory to think of identifying without any mistake, the whole of the patents of the studied population multinational corporations. The objective is to choose the most satisfactory trade off between the false-negatives and the false-positives. In the first case, it would mean matching a patent to a company, which it should not; in the second case, it would mean missing a patent applied for by a firm which would lead to its non integration in the company’s patent portfolio. Nevertheless, a margin of error always remains when using an automated process.</div> <p> </p> <div>Our goal is to limit the extent of error for this first edition and to work in order to improve it in future editions of the Corporate Invention Board. This will be possible with methodological improvements in names’ identification and standardization and thanks to improvements in the the Patstat database (see <a href="/web/20100328222851/http://www.corporateinventionboard.eu/en/data-sources" title="Sources of the analyzed data"><span style="color: #810081;">Data sources</span></a>). This should improve the matching process.</div> <p> </p> <div>It is our intention to engage with other research groups who would like to work within the framework of this first edition of Corporate Invention Board.</div> </div><span class="article_separator"> </span> </div> <div class="cleared"></div> </div> <div class="cleared"></div> </div> </div> </div> </div> <div class="cleared"></div> <div class="art-Footer"> <div class="art-Footer-inner"> <div class="art-Footer-text"> <p>© Corporate Invention Board 2009 - 2010.<br/> All Rights Reserved.</p> </div> </div> <div class="art-Footer-background"></div> </div> <div class="cleared"></div> </div> </div> <div class="cleared"></div> <p class="art-page-footer"></p> </div> <!-- google analytics --> <script type="text/javascript"> var gaJsHost = (("https:" == document.location.protocol) ? "https://web.archive.org/web/20100328222851/https://ssl." : "https://web.archive.org/web/20100328222851/http://www."); document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); </script> <script type="text/javascript"> try { var pageTracker = _gat._getTracker("UA-778526-4"); pageTracker._trackPageview(); } catch(err) {}</script> </body> <!-- phpmyvisites --> <a href="https://web.archive.org/web/20100328222851/http://www.phpmyvisites.net/" title="phpMyVisites | Open source web analytics" onclick="window.open(this.href);return(false);"><script type="text/javascript"> <!-- var a_vars = Array(); var pagename=''; var phpmyvisitesSite = 1; var phpmyvisitesURL = "https://web.archive.org/web/20100328222851/http://www.corporateinventionboard.eu/phpmv2/phpmyvisites.php"; //--> </script> <script language="javascript" src="https://web.archive.org/web/20100328222851js_/http://www.corporateinventionboard.eu/phpmv2/phpmyvisites.js" type="text/javascript"></script> <object><noscript><p>phpMyVisites | Open source web analytics <img src="https://web.archive.org/web/20100328222851im_/http://www.corporateinventionboard.eu/phpmv2/phpmyvisites.php" alt="Statistics" style="border:0"/> </p></noscript></object></a> <!-- /phpmyvisites --> </html><!-- FILE ARCHIVED ON 22:28:51 Mar 28, 2010 AND RETRIEVED FROM THE INTERNET ARCHIVE ON 23:37:31 Nov 23, 2024. JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE. ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C. SECTION 108(a)(3)). --> <!-- playback timings (ms): captures_list: 0.577 exclusion.robots: 0.03 exclusion.robots.policy: 0.019 esindex: 0.01 cdx.remote: 36.929 LoadShardBlock: 374.901 (3) PetaboxLoader3.resolve: 338.824 (4) PetaboxLoader3.datanode: 232.398 (4) load_resource: 248.591 -->