CINXE.COM

Wikidata - zavod

<!doctype html> <html lang="en" class="no-js"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width,initial-scale=1"> <link rel="prev" href="../peps/"> <link rel="icon" href="https://assets.opensanctions.org/images/favicon-32x32.png"> <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.5"> <title>Wikidata - zavod</title> <link rel="stylesheet" href="../assets/stylesheets/main.8608ea7d.min.css"> <link rel="stylesheet" href="../assets/stylesheets/palette.06af60db.min.css"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"> <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style> <link rel="stylesheet" href="../assets/_mkdocstrings.css"> <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script> </head> <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="custom" data-md-color-accent="custom"> <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off"> <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off"> <label class="md-overlay" for="__drawer"></label> <div data-md-component="skip"> <a href="#wikidata" class="md-skip"> Skip to content </a> </div> <div data-md-component="announce"> </div> <header class="md-header md-header--shadow" data-md-component="header"> <nav class="md-header__inner md-grid" aria-label="Header"> <a href=".." title="zavod" class="md-header__button md-logo" aria-label="zavod" data-md-component="logo"> <img src="https://assets.opensanctions.org/images/ura/logo_white.png" alt="logo"> </a> <label class="md-header__button md-icon" for="__drawer"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg> </label> <div class="md-header__title" data-md-component="header-title"> <div class="md-header__ellipsis"> <div class="md-header__topic"> <span class="md-ellipsis"> zavod </span> </div> <div class="md-header__topic" data-md-component="header-topic"> <span class="md-ellipsis"> Wikidata </span> </div> </div> </div> <div class="md-header__source"> <a href="https://github.com/opensanctions/opensanctions" title="Go to repository" class="md-source" data-md-component="source"> <div class="md-source__icon md-icon"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> </div> <div class="md-source__repository"> GitHub </div> </a> </div> </nav> </header> <div class="md-container" data-md-component="container"> <main class="md-main" data-md-component="main"> <div class="md-main__inner md-grid"> <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" > <div class="md-sidebar__scrollwrap"> <div class="md-sidebar__inner"> <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0"> <label class="md-nav__title" for="__drawer"> <a href=".." title="zavod" class="md-nav__button md-logo" aria-label="zavod" data-md-component="logo"> <img src="https://assets.opensanctions.org/images/ura/logo_white.png" alt="logo"> </a> zavod </label> <div class="md-nav__source"> <a href="https://github.com/opensanctions/opensanctions" title="Go to repository" class="md-source" data-md-component="source"> <div class="md-source__icon md-icon"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> </div> <div class="md-source__repository"> GitHub </div> </a> </div> <ul class="md-nav__list" data-md-scrollfix> <li class="md-nav__item"> <a href=".." class="md-nav__link"> <span class="md-ellipsis"> Overview </span> </a> </li> <li class="md-nav__item"> <a href="../install/" class="md-nav__link"> <span class="md-ellipsis"> Installation </span> </a> </li> <li class="md-nav__item"> <a href="../tutorial/" class="md-nav__link"> <span class="md-ellipsis"> Tutorial </span> </a> </li> <li class="md-nav__item"> <a href="../usage/" class="md-nav__link"> <span class="md-ellipsis"> Command-line usage </span> </a> </li> <li class="md-nav__item"> <a href="../metadata/" class="md-nav__link"> <span class="md-ellipsis"> Dataset metadata </span> </a> </li> <li class="md-nav__item"> <a href="../context/" class="md-nav__link"> <span class="md-ellipsis"> Context </span> </a> </li> <li class="md-nav__item"> <a href="../helpers/" class="md-nav__link"> <span class="md-ellipsis"> Helpers </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" > <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0"> <span class="md-ellipsis"> Best practices </span> <span class="md-nav__icon md-icon"></span> </label> <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false"> <label class="md-nav__title" for="__nav_8"> <span class="md-nav__icon md-icon"></span> Best practices </label> <ul class="md-nav__list" data-md-scrollfix> <li class="md-nav__item"> <a href="../best_practices/caching/" class="md-nav__link"> <span class="md-ellipsis"> Caching Considerations </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/patterns/" class="md-nav__link"> <span class="md-ellipsis"> Common Patterns </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/datapatch_lookups/" class="md-nav__link"> <span class="md-ellipsis"> Datapatch lookups </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/dates_meta/" class="md-nav__link"> <span class="md-ellipsis"> Date parsing with dataset metadata </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/http_operations/" class="md-nav__link"> <span class="md-ellipsis"> HTTP Operations </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/priorities/" class="md-nav__link"> <span class="md-ellipsis"> Data priorities </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/merge_checklist/" class="md-nav__link"> <span class="md-ellipsis"> Checklist when reviewing a crawler </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/xpath_and_html/" class="md-nav__link"> <span class="md-ellipsis"> XPath and HTML </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="../peps/" class="md-nav__link"> <span class="md-ellipsis"> PEPs </span> </a> </li> <li class="md-nav__item md-nav__item--active"> <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc"> <label class="md-nav__link md-nav__link--active" for="__toc"> <span class="md-ellipsis"> Wikidata </span> <span class="md-nav__icon md-icon"></span> </label> <a href="./" class="md-nav__link md-nav__link--active"> <span class="md-ellipsis"> Wikidata </span> </a> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> <label class="md-nav__title" for="__toc"> <span class="md-nav__icon md-icon"></span> Table of contents </label> <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> <li class="md-nav__item"> <a href="#publishing-to-wikidata-using-zavod" class="md-nav__link"> <span class="md-ellipsis"> Publishing to Wikidata using zavod </span> </a> <nav class="md-nav" aria-label="Publishing to Wikidata using zavod"> <ul class="md-nav__list"> <li class="md-nav__item"> <a href="#running-zavod-wd-up" class="md-nav__link"> <span class="md-ellipsis"> Running zavod wd-up </span> </a> </li> </ul> </nav> </li> </ul> </nav> </li> </ul> </nav> </div> </div> </div> <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" > <div class="md-sidebar__scrollwrap"> <div class="md-sidebar__inner"> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> <label class="md-nav__title" for="__toc"> <span class="md-nav__icon md-icon"></span> Table of contents </label> <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> <li class="md-nav__item"> <a href="#publishing-to-wikidata-using-zavod" class="md-nav__link"> <span class="md-ellipsis"> Publishing to Wikidata using zavod </span> </a> <nav class="md-nav" aria-label="Publishing to Wikidata using zavod"> <ul class="md-nav__list"> <li class="md-nav__item"> <a href="#running-zavod-wd-up" class="md-nav__link"> <span class="md-ellipsis"> Running zavod wd-up </span> </a> </li> </ul> </nav> </li> </ul> </nav> </div> </div> </div> <div class="md-content" data-md-component="content"> <article class="md-content__inner md-typeset"> <h1 id="wikidata">Wikidata</h1> <p>We import <a href="https://www.wikidata.org/wiki/Wikidata:Main_Page">Wikidata</a> to OpenSanctions in two ways: using a <a href="../tutorial/">crawler</a> which imports persons who have held any of a set of wikidata positions we have categorised as <a href="https://opensanctions.org/pep">Politically Exposed Person positions</a>, and using our <a href="https://www.opensanctions.org/datasets/wikidata/">Wikidata Enricher</a>.</p> <p>We also occasionally publish data for a small selection of properties to Wikidata. The current publishing process is interactive and completely supervised by a human.</p> <h2 id="publishing-to-wikidata-using-zavod">Publishing to Wikidata using zavod</h2> <p>The zavod command line tool can publish data from a specific dataset to Wikidata. The tool iterates over the entities in the specified dataset until it finds an entity for which it can perform some action:</p> <ol> <li>If an entity has a Wikidata QID, it proposes any edits it can make, awaiting user confirmation to publish.</li> <li>If an entity does not have a QID, it searches for existing Wikidata items to <a href="https://www.opensanctions.org/docs/identifiers/">resolve the entity to</a>, and proposes the wikidata edits it would make if the user instead chooses to create a new Wikidata item.</li> </ol> <p>Resolving the entity to an existing Wikidata item repeats the check for potential edits. If no edits are proposed for the current entity, the next entity with possible actions is loaded.</p> <p>Publishing changes to wikidata can take a number of seconds, since the Wikidata API imposes throttling to avoid overload. Once changes are published, the next entity with possible actions is loaded.</p> <h3 id="running-zavod-wd-up">Running zavod <code>wd-up</code></h3> <p>In addition to basic zavod setup, the following environment variables:</p> <div class="language-text highlight"><pre><span></span><code>ZAVOD_ARCHIVE_BUCKET=data.opensanctions.org ZAVOD_ARCHIVE_BACKEND=GoogleCloudBackend ZAVOD_WD_CONSUMER_TOKEN ZAVOD_WD_CONSUMER_SECRET ZAVOD_WD_ACCESS_TOKEN ZAVOD_WD_ACCESS_SECRET ZAVOD_WD_USER PYWIKIBOT_DIR=.pywikibot </code></pre></div> <p>Get OAuth credentials by registering an <a href="https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration/propose/oauth1a">OAuth 1.0a consumer</a> with permission to:</p> <ul> <li>edit existing pages</li> <li>create, edit and move pages</li> </ul> <p>Set <code>ZAVOD_WD_USER</code> to the username used for the OAuth consumer.</p> <p>Set <code>PYWIKIBOT_DIR</code> to the directory directory with your <code>user-config.py</code> - <code>.pywikibot</code> in this repository is probably sufficient.</p> <p>Copy an up to date resolve.ijson file to your <code>ZAVOD_RESOLVER_PATH</code>.</p> <p>Run <code>wd-up</code> as follows, changing for the dataset and country you'd like to sync up:</p> <div class="language-text highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>zavod wd-up \ </span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a> --clear \ </span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a> datasets/de/abgeordnetenwatch/de_abgeordnetenwatch.yml \ </span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a> datasets/_analysis/ann_pep_positions/ann_pep_positions.yml \ </span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a> --country-adjective German \ </span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a> --country-code de </span></code></pre></div> <ul> <li>The panel on the left shows the current OpenSanctions entity, and below that the proposed actions.</li> <li>The middle panel shows the search results for Wikidata items if the current entity does not have a QID. Highlight the right option using up/down arrows.</li> <li>The panel on the right shows log of operations by <code>wd-up</code> and instructs your next step.</li> <li>Press save after creating or resolving a wikidata item and remember to copy your resolve.ijson back and upstream the changes.</li> </ul> <p>Country adjective is used to generate descriptions like <code>German politician</code>.</p> <p>Country code is used to sanity check that the position refers to the country of the supplied nationality adjective. It only works if you supply matching arguments - it isn't clever. Use lowercase like the data does.</p> <p><code>wd-up</code> has the following limitations (and probably many more)</p> <ul> <li>Collection datasets don't work very well because it does not properly generate sources for referencing.</li> <li>Only Person entities which are targets are considered.</li> <li>Supported properties:</li> <li>labels of any language</li> <li>descriptions only in <code>en</code> (English)</li> <li>'instance of' Human</li> <li>'sex or gender'</li> <li>'birth date'</li> <li>'position held'</li> <li>Only positions which have QIDs are considered. You might benefit from doing an <a href="https://www.opensanctions.org/docs/identifiers/">xref</a> between your dataset and wd_peps first.</li> <li>Edits are only proposed if sources can be provided, except for labels, descriptions, and 'instance of'.</li> <li>Sources are only proposed if a <code>sourceUrl</code> property is available for the entity with the same source dataset as the property being considered.</li> </ul> </article> </div> <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> </div> </main> <footer class="md-footer"> <div class="md-footer-meta md-typeset"> <div class="md-footer-meta__inner md-grid"> <div class="md-copyright"> Made with <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener"> Material for MkDocs </a> </div> <div class="md-social"> <a href="https://www.opensanctions.org/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M16.36 14c.08-.66.14-1.32.14-2s-.06-1.34-.14-2h3.38c.16.64.26 1.31.26 2s-.1 1.36-.26 2m-5.15 5.56c.6-1.11 1.06-2.31 1.38-3.56h2.95a8.03 8.03 0 0 1-4.33 3.56M14.34 14H9.66c-.1-.66-.16-1.32-.16-2s.06-1.35.16-2h4.68c.09.65.16 1.32.16 2s-.07 1.34-.16 2M12 19.96c-.83-1.2-1.5-2.53-1.91-3.96h3.82c-.41 1.43-1.08 2.76-1.91 3.96M8 8H5.08A7.92 7.92 0 0 1 9.4 4.44C8.8 5.55 8.35 6.75 8 8m-2.92 8H8c.35 1.25.8 2.45 1.4 3.56A8 8 0 0 1 5.08 16m-.82-2C4.1 13.36 4 12.69 4 12s.1-1.36.26-2h3.38c-.08.66-.14 1.32-.14 2s.06 1.34.14 2M12 4.03c.83 1.2 1.5 2.54 1.91 3.97h-3.82c.41-1.43 1.08-2.77 1.91-3.97M18.92 8h-2.95a15.7 15.7 0 0 0-1.38-3.56c1.84.63 3.37 1.9 4.33 3.56M12 2C6.47 2 2 6.5 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10A10 10 0 0 0 12 2"/></svg> </a> <a href="https://www.opensanctions.org/contact/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 4H4c-1.11 0-2 .89-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2m-3 13H7v-2h10m0-2H7v-2h10m3-2h-3V6h3"/></svg> </a> <a href="https://www.opensanctions.org/slack/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M6 15a2 2 0 0 1-2 2 2 2 0 0 1-2-2 2 2 0 0 1 2-2h2zm1 0a2 2 0 0 1 2-2 2 2 0 0 1 2 2v5a2 2 0 0 1-2 2 2 2 0 0 1-2-2zm2-8a2 2 0 0 1-2-2 2 2 0 0 1 2-2 2 2 0 0 1 2 2v2zm0 1a2 2 0 0 1 2 2 2 2 0 0 1-2 2H4a2 2 0 0 1-2-2 2 2 0 0 1 2-2zm8 2a2 2 0 0 1 2-2 2 2 0 0 1 2 2 2 2 0 0 1-2 2h-2zm-1 0a2 2 0 0 1-2 2 2 2 0 0 1-2-2V5a2 2 0 0 1 2-2 2 2 0 0 1 2 2zm-2 8a2 2 0 0 1 2 2 2 2 0 0 1-2 2 2 2 0 0 1-2-2v-2zm0-1a2 2 0 0 1-2-2 2 2 0 0 1 2-2h5a2 2 0 0 1 2 2 2 2 0 0 1-2 2z"/></svg> </a> <a href="https://github.com/opensanctions/opensanctions" target="_blank" rel="noopener" title="github.com" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 2A10 10 0 0 0 2 12c0 4.42 2.87 8.17 6.84 9.5.5.08.66-.23.66-.5v-1.69c-2.77.6-3.36-1.34-3.36-1.34-.46-1.16-1.11-1.47-1.11-1.47-.91-.62.07-.6.07-.6 1 .07 1.53 1.03 1.53 1.03.87 1.52 2.34 1.07 2.91.83.09-.65.35-1.09.63-1.34-2.22-.25-4.55-1.11-4.55-4.92 0-1.11.38-2 1.03-2.71-.1-.25-.45-1.29.1-2.64 0 0 .84-.27 2.75 1.02.79-.22 1.65-.33 2.5-.33s1.71.11 2.5.33c1.91-1.29 2.75-1.02 2.75-1.02.55 1.35.2 2.39.1 2.64.65.71 1.03 1.6 1.03 2.71 0 3.82-2.34 4.66-4.57 4.91.36.31.69.92.69 1.85V21c0 .27.16.59.67.5C19.14 20.16 22 16.42 22 12A10 10 0 0 0 12 2"/></svg> </a> </div> </div> </div> </footer> </div> <div class="md-dialog" data-md-component="dialog"> <div class="md-dialog__inner md-typeset"></div> </div> <script id="__config" type="application/json">{"base": "..", "features": [], "search": "../assets/javascripts/workers/search.f8cc74c7.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script> <script src="../assets/javascripts/bundle.f1b6f286.min.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10