CINXE.COM

Tutorial - zavod

<!doctype html> <html lang="en" class="no-js"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width,initial-scale=1"> <link rel="prev" href="../install/"> <link rel="next" href="../usage/"> <link rel="icon" href="https://assets.opensanctions.org/images/favicon-32x32.png"> <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.5"> <title>Tutorial - zavod</title> <link rel="stylesheet" href="../assets/stylesheets/main.8608ea7d.min.css"> <link rel="stylesheet" href="../assets/stylesheets/palette.06af60db.min.css"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"> <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style> <link rel="stylesheet" href="../assets/_mkdocstrings.css"> <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script> </head> <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="custom" data-md-color-accent="custom"> <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off"> <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off"> <label class="md-overlay" for="__drawer"></label> <div data-md-component="skip"> <a href="#developing-a-crawler" class="md-skip"> Skip to content </a> </div> <div data-md-component="announce"> </div> <header class="md-header md-header--shadow" data-md-component="header"> <nav class="md-header__inner md-grid" aria-label="Header"> <a href=".." title="zavod" class="md-header__button md-logo" aria-label="zavod" data-md-component="logo"> <img src="https://assets.opensanctions.org/images/ura/logo_white.png" alt="logo"> </a> <label class="md-header__button md-icon" for="__drawer"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg> </label> <div class="md-header__title" data-md-component="header-title"> <div class="md-header__ellipsis"> <div class="md-header__topic"> <span class="md-ellipsis"> zavod </span> </div> <div class="md-header__topic" data-md-component="header-topic"> <span class="md-ellipsis"> Tutorial </span> </div> </div> </div> <div class="md-header__source"> <a href="https://github.com/opensanctions/opensanctions" title="Go to repository" class="md-source" data-md-component="source"> <div class="md-source__icon md-icon"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> </div> <div class="md-source__repository"> GitHub </div> </a> </div> </nav> </header> <div class="md-container" data-md-component="container"> <main class="md-main" data-md-component="main"> <div class="md-main__inner md-grid"> <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" > <div class="md-sidebar__scrollwrap"> <div class="md-sidebar__inner"> <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0"> <label class="md-nav__title" for="__drawer"> <a href=".." title="zavod" class="md-nav__button md-logo" aria-label="zavod" data-md-component="logo"> <img src="https://assets.opensanctions.org/images/ura/logo_white.png" alt="logo"> </a> zavod </label> <div class="md-nav__source"> <a href="https://github.com/opensanctions/opensanctions" title="Go to repository" class="md-source" data-md-component="source"> <div class="md-source__icon md-icon"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> </div> <div class="md-source__repository"> GitHub </div> </a> </div> <ul class="md-nav__list" data-md-scrollfix> <li class="md-nav__item"> <a href=".." class="md-nav__link"> <span class="md-ellipsis"> Overview </span> </a> </li> <li class="md-nav__item"> <a href="../install/" class="md-nav__link"> <span class="md-ellipsis"> Installation </span> </a> </li> <li class="md-nav__item md-nav__item--active"> <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc"> <label class="md-nav__link md-nav__link--active" for="__toc"> <span class="md-ellipsis"> Tutorial </span> <span class="md-nav__icon md-icon"></span> </label> <a href="./" class="md-nav__link md-nav__link--active"> <span class="md-ellipsis"> Tutorial </span> </a> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> <label class="md-nav__title" for="__toc"> <span class="md-nav__icon md-icon"></span> Table of contents </label> <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> <li class="md-nav__item"> <a href="#data-source-metadata" class="md-nav__link"> <span class="md-ellipsis"> Data source metadata </span> </a> </li> <li class="md-nav__item"> <a href="#running-a-dataset-crawler" class="md-nav__link"> <span class="md-ellipsis"> Running a dataset crawler </span> </a> </li> <li class="md-nav__item"> <a href="#developing-a-crawler-script" class="md-nav__link"> <span class="md-ellipsis"> Developing a crawler script </span> </a> <nav class="md-nav" aria-label="Developing a crawler script"> <ul class="md-nav__list"> <li class="md-nav__item"> <a href="#fetching-and-storing-resources" class="md-nav__link"> <span class="md-ellipsis"> Fetching and storing resources </span> </a> </li> <li class="md-nav__item"> <a href="#creating-and-emitting-entities" class="md-nav__link"> <span class="md-ellipsis"> Creating and emitting entities </span> </a> </li> <li class="md-nav__item"> <a href="#verifying-your-output" class="md-nav__link"> <span class="md-ellipsis"> Verifying your output </span> </a> </li> <li class="md-nav__item"> <a href="#add-your-crawler-to-a-collection" class="md-nav__link"> <span class="md-ellipsis"> Add your crawler to a collection </span> </a> </li> <li class="md-nav__item"> <a href="#next-steps" class="md-nav__link"> <span class="md-ellipsis"> Next steps </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="#checklist" class="md-nav__link"> <span class="md-ellipsis"> Checklist </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="../usage/" class="md-nav__link"> <span class="md-ellipsis"> Command-line usage </span> </a> </li> <li class="md-nav__item"> <a href="../metadata/" class="md-nav__link"> <span class="md-ellipsis"> Dataset metadata </span> </a> </li> <li class="md-nav__item"> <a href="../context/" class="md-nav__link"> <span class="md-ellipsis"> Context </span> </a> </li> <li class="md-nav__item"> <a href="../helpers/" class="md-nav__link"> <span class="md-ellipsis"> Helpers </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" > <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0"> <span class="md-ellipsis"> Best practices </span> <span class="md-nav__icon md-icon"></span> </label> <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false"> <label class="md-nav__title" for="__nav_8"> <span class="md-nav__icon md-icon"></span> Best practices </label> <ul class="md-nav__list" data-md-scrollfix> <li class="md-nav__item"> <a href="../best_practices/caching/" class="md-nav__link"> <span class="md-ellipsis"> Caching Considerations </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/patterns/" class="md-nav__link"> <span class="md-ellipsis"> Common Patterns </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/datapatch_lookups/" class="md-nav__link"> <span class="md-ellipsis"> Datapatch lookups </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/dates_meta/" class="md-nav__link"> <span class="md-ellipsis"> Date parsing with dataset metadata </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/http_operations/" class="md-nav__link"> <span class="md-ellipsis"> HTTP Operations </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/priorities/" class="md-nav__link"> <span class="md-ellipsis"> Data priorities </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/merge_checklist/" class="md-nav__link"> <span class="md-ellipsis"> Checklist when reviewing a crawler </span> </a> </li> <li class="md-nav__item"> <a href="../best_practices/xpath_and_html/" class="md-nav__link"> <span class="md-ellipsis"> XPath and HTML </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="../peps/" class="md-nav__link"> <span class="md-ellipsis"> PEPs </span> </a> </li> <li class="md-nav__item"> <a href="../wikidata/" class="md-nav__link"> <span class="md-ellipsis"> Wikidata </span> </a> </li> </ul> </nav> </div> </div> </div> <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" > <div class="md-sidebar__scrollwrap"> <div class="md-sidebar__inner"> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> <label class="md-nav__title" for="__toc"> <span class="md-nav__icon md-icon"></span> Table of contents </label> <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> <li class="md-nav__item"> <a href="#data-source-metadata" class="md-nav__link"> <span class="md-ellipsis"> Data source metadata </span> </a> </li> <li class="md-nav__item"> <a href="#running-a-dataset-crawler" class="md-nav__link"> <span class="md-ellipsis"> Running a dataset crawler </span> </a> </li> <li class="md-nav__item"> <a href="#developing-a-crawler-script" class="md-nav__link"> <span class="md-ellipsis"> Developing a crawler script </span> </a> <nav class="md-nav" aria-label="Developing a crawler script"> <ul class="md-nav__list"> <li class="md-nav__item"> <a href="#fetching-and-storing-resources" class="md-nav__link"> <span class="md-ellipsis"> Fetching and storing resources </span> </a> </li> <li class="md-nav__item"> <a href="#creating-and-emitting-entities" class="md-nav__link"> <span class="md-ellipsis"> Creating and emitting entities </span> </a> </li> <li class="md-nav__item"> <a href="#verifying-your-output" class="md-nav__link"> <span class="md-ellipsis"> Verifying your output </span> </a> </li> <li class="md-nav__item"> <a href="#add-your-crawler-to-a-collection" class="md-nav__link"> <span class="md-ellipsis"> Add your crawler to a collection </span> </a> </li> <li class="md-nav__item"> <a href="#next-steps" class="md-nav__link"> <span class="md-ellipsis"> Next steps </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="#checklist" class="md-nav__link"> <span class="md-ellipsis"> Checklist </span> </a> </li> </ul> </nav> </div> </div> </div> <div class="md-content" data-md-component="content"> <article class="md-content__inner md-typeset"> <h1 id="developing-a-crawler">Developing a crawler</h1> <p>A crawler is a small Python script that will import data from a web origin and store it as entities as a data source. <code>zavod</code> defines a framework for crawlers to retrieve data, parse it and emit structured data about people or companies into a database.</p> <div class="admonition info"> <p class="admonition-title">Please note</p> <p>Before you contribute a crawler, please consider if you are willing to remain involved in its maintenance after having the code included in <code>zavod</code>. Maintaining a crawler is an ongoing commitment, and the OpenSanctions team does not automatically assume that responsibility for code contributed by others. See our general <a href="https://www.opensanctions.org/docs/criteria/">inclusion critera</a>.</p> </div> <ol> <li>Make sure you have <a href="../install/">installed zavod</a> and <a href="../install/#configuration">set the required environment variables</a>, specifically <code>ZAVOD_RESOLVER_PATH</code> and <code>ZAVOD_SYNC_POSITIONS</code>.</li> <li>File a <a href="https://github.com/opensanctions/opensanctions/issues">GitHub issue</a> to discuss the suggested source.</li> <li>Create a <a href="#data-source-metadata">YAML metadata</a> description for the new source.</li> <li>Create a <a href="#developing-a-crawler-script">Python script</a> to fetch and process the data.</li> <li>Address any data normalisation issues the framework might report.</li> </ol> <h2 id="data-source-metadata">Data source metadata</h2> <p>Before programming a crawler script, you need to create a YAML file with some basic metadata to describe the new dataset. That information includes the dataset name (which is normally derived from the YAML file name), information about the source publisher and the source data URL.</p> <p>The metadata file must also include a reference to the entry point, the Python code that should be executed in order to crawl the source.</p> <p>Create a new YAML file at the path <code>datasets/cc/source/cc_source.yml</code> replacing <code>cc</code> with the relevant ISO 3166-2 country code, and <code>source</code> with an acronym or short name for the source, separating name parts using underscores. Other codes may be derived from standard acronyms instead of country codes for regions that span beyond one country.</p> <div class="admonition warning"> <p class="admonition-title">Important</p> <p>Metadata is essential to making our data useable. We will not merge additional crawlers which don't have metadata, or where the descriptions are cryptic.</p> <p>Read your metadata as if it's your first time, and ask yourself if other readers will understand the scope and limitations of the dataset. For simple crawlers, writing the metadata can take as much time as writing the code.</p> </div> <p>The contents of the new metadata file should look something like this. This is a brief example. See the full <a href="../metadata/">metadata documentation</a> for all the required fields:</p> <div class="language-yaml highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">eu_fsf_demo</span> </span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="nt">title</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;Financial</span><span class="nv"> </span><span class="s">Sanctions</span><span class="nv"> </span><span class="s">Files</span><span class="nv"> </span><span class="s">(FSF)&quot;</span> </span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://eeas.europa.eu/</span> </span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="nt">load_db_uri</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">${OPENSANCTIONS_DATABASE_URI}</span> </span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="nt">coverage</span><span class="p">:</span> </span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="w"> </span><span class="nt">frequency</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">daily</span> </span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a><span class="w"> </span><span class="nt">start</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">2024-03-19</span> </span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> </span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="c1"># The description should be extensive, and can use markdown for formatting:</span> </span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">&gt;</span> </span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a><span class="w"> </span><span class="no">As part of the Common Foreign Security Policy thr European Union publishes</span> </span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a><span class="w"> </span><span class="no">a sanctions list that is implemented by all member states.</span> </span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a> </span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a><span class="c1"># The Python module in the same director that contains the crawler code:</span> </span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a><span class="nt">entry_point</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">crawler.py</span> </span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a> </span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17" href="#__codelineno-0-17"></a><span class="c1"># A prefix will be used to mint entity IDs. Keep it short.</span> </span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18" href="#__codelineno-0-18"></a><span class="nt">prefix</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">eu-fsf</span> </span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19" href="#__codelineno-0-19"></a> </span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20" href="#__codelineno-0-20"></a><span class="c1"># This section provides information about the original publisher of the data,</span> </span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21" href="#__codelineno-0-21"></a><span class="c1"># often a government authority:</span> </span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22" href="#__codelineno-0-22"></a><span class="nt">publisher</span><span class="p">:</span> </span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23" href="#__codelineno-0-23"></a><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">European Union External Action Service</span> </span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24" href="#__codelineno-0-24"></a><span class="w"> </span><span class="nt">acronym</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">EEAS</span> </span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25" href="#__codelineno-0-25"></a><span class="w"> </span><span class="nt">official</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span> </span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26" href="#__codelineno-0-26"></a><span class="w"> </span><span class="nt">description</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">The EEAS is the EU&#39;s diplomatic service, and carries out the EU&#39;s foreign and security policy.</span><span class="w"> </span> </span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27" href="#__codelineno-0-27"></a><span class="w"> </span><span class="nt">country</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">eu</span> </span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28" href="#__codelineno-0-28"></a><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://eeas.europa.eu/topics/sanctions-policy/8442/consolidated-list-of-sanctions_en</span> </span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29" href="#__codelineno-0-29"></a> </span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30" href="#__codelineno-0-30"></a><span class="c1"># Information about the data, including a deep link to a downloadable file, if</span> </span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31" href="#__codelineno-0-31"></a><span class="c1"># one exists.</span> </span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32" href="#__codelineno-0-32"></a><span class="nt">data</span><span class="p">:</span> </span><span id="__span-0-33"><a id="__codelineno-0-33" name="__codelineno-0-33" href="#__codelineno-0-33"></a><span class="w"> </span><span class="nt">url</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://webgate.ec.europa.eu/europeaid/fsd/fsf/public/files/xmlFullSanctionsList_1_1/content</span> </span><span id="__span-0-34"><a id="__codelineno-0-34" name="__codelineno-0-34" href="#__codelineno-0-34"></a><span class="w"> </span><span class="nt">format</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">XML</span> </span></code></pre></div> <h2 id="running-a-dataset-crawler">Running a dataset crawler</h2> <p>Once that YAML file is stored in the correct folder, you should be able to run command-line operations against the dataset. For example (if your metadata file is named <code>eu_fsf_demo.yml</code>):</p> <div class="language-bash highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>$<span class="w"> </span>zavod<span class="w"> </span>crawl<span class="w"> </span>datasets/eu/fsf/eu_fsf_demo.yml </span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>.... </span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="m">2023</span>-08-01<span class="w"> </span><span class="m">12</span>:36:24<span class="w"> </span><span class="o">[</span>warning<span class="w"> </span><span class="o">]</span><span class="w"> </span>No<span class="w"> </span>backfill<span class="w"> </span>bucket<span class="w"> </span>configured<span class="w"> </span><span class="o">[</span>zavod.archive<span class="o">]</span><span class="w"> </span> </span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="m">2023</span>-08-01<span class="w"> </span><span class="m">12</span>:36:24<span class="w"> </span><span class="o">[</span>info<span class="w"> </span><span class="o">]</span><span class="w"> </span>Running<span class="w"> </span>dataset<span class="w"> </span><span class="o">[</span>eue_fsf_demo<span class="o">]</span><span class="w"> </span><span class="nv">dataset</span><span class="o">=</span>eue_fsf_demo<span class="w"> </span><span class="nv">path</span><span class="o">=</span>/home/you/opensanctions/data/datasets/eue_fsf_demo </span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a><span class="m">2023</span>-08-01<span class="w"> </span><span class="m">12</span>:36:24<span class="w"> </span><span class="o">[</span>error<span class="w"> </span><span class="o">]</span><span class="w"> </span>Runner<span class="w"> </span>failed:<span class="w"> </span>Could<span class="w"> </span>not<span class="w"> </span>load<span class="w"> </span>entry<span class="w"> </span>point:<span class="w"> </span>crawler<span class="w"> </span><span class="o">[</span>eue_fsf_demo<span class="o">]</span><span class="w"> </span><span class="nv">dataset</span><span class="o">=</span>eue_fsf_demo </span></code></pre></div> <p>Don't worry about the backfill bucket warning - that is not needed when developing crawlers. It is used in production to automatically track when data was previously seen and updated.</p> <p>The <code>Runner failed: Could not load entry point: crawler</code> error indicates that it looked for our crawler and couldn't find it. Adding the crawler script is the next step.</p> <div class="admonition info"> <p class="admonition-title">Dry run mode</p> <p>You can switch zavod to dry run during crawler development by adding the <code>-d</code> (or <code>--dry-run</code>) flag on the command line. A dry run will not store any of the emitted data, and disable the generation of correct timestamps, which is slow.</p> <p><code>zavod crawl -d datasets/eu/fsf/eu_fsf_demo.yml</code></p> </div> <h2 id="developing-a-crawler-script">Developing a crawler script</h2> <p>In order to actually feed data into the data source, we need to write a crawler script. The script location is specified in the YAML metadata file as <code>entry_point:</code>. This also means you could reference the same script for multiple data sources, for example in a scenario where two data sources use the API, except with some varied parameters.</p> <p>In our example above, we'd create a file in <code>datasets/eu/fsf/crawler.py</code> with a crawler skeleton:</p> <div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">zavod</span><span class="w"> </span><span class="kn">import</span> <span class="n">Context</span> </span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a> </span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="k">def</span><span class="w"> </span><span class="nf">crawl</span><span class="p">(</span><span class="n">context</span><span class="p">:</span> <span class="n">Context</span><span class="p">):</span> </span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a> <span class="n">context</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Hello, World!&quot;</span><span class="p">)</span> </span></code></pre></div> <p>Running the crawler (<code>zavod crawl datasets/eu/fsf/eu_fsf_demo.yml</code>) should now produce a log line with the message <em>Hello, World!</em></p> <p>You'll notice that the <code>crawl()</code> function receives a <a class="autorefs autorefs-internal" href="../context/#zavod.context.Context"><code>Context</code></a> object. Think of it as a sort of sidekick: it helps you to create, store and document data in your crawler.</p> <h3 id="fetching-and-storing-resources">Fetching and storing resources</h3> <p>Many crawlers will start off by downloading a source data file, like a CSV table or a XML document. The <a class="autorefs autorefs-internal" href="../context/#zavod.context.Context"><code>context</code></a> provides utility methods that let you fetch a file and store it into the crawlers working directory. Files stored to the crawler home directory and <a class="autorefs autorefs-internal" href="../context/#zavod.context.Context.export_resource">exported as resources</a> will later be uploaded and published to the web.</p> <div class="language-python highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="k">def</span><span class="w"> </span><span class="nf">crawl</span><span class="p">(</span><span class="n">context</span><span class="p">):</span> </span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a> <span class="c1"># Fetch the source data URL specified in the metadata to a local path:</span> </span><span id="__span-3-3"><a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a> <span class="n">source_path</span> <span class="o">=</span> <span class="n">context</span><span class="o">.</span><span class="n">fetch_resource</span><span class="p">(</span><span class="s1">&#39;source.xml&#39;</span><span class="p">,</span> <span class="n">context</span><span class="o">.</span><span class="n">dataset</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">url</span><span class="p">)</span> </span><span id="__span-3-4"><a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a> <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">source_path</span><span class="p">,</span> <span class="s1">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">fh</span><span class="p">:</span> </span><span id="__span-3-5"><a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a> <span class="nb">print</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">fh</span><span class="o">.</span><span class="n">read</span><span class="p">()))</span> </span><span id="__span-3-6"><a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a> </span><span id="__span-3-7"><a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a> <span class="c1"># You can also register the file as a resource with the dataset that</span> </span><span id="__span-3-8"><a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a> <span class="c1"># will be included in the exported metadata index:</span> </span><span id="__span-3-9"><a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a> <span class="n">context</span><span class="o">.</span><span class="n">export_resource</span><span class="p">(</span><span class="n">source_path</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="s2">&quot;Source data XML file&quot;</span><span class="p">)</span> </span></code></pre></div> <p>Other crawlers might not be as lucky: instead of fetching their source data as a single bulk file, they might need to crawl a large number of web pages to collect the necessary data. For this, access to a pre-configured Python <code>requests</code> session object is provided:</p> <div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">lxml</span><span class="w"> </span><span class="kn">import</span> <span class="n">html</span> </span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a> </span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="k">def</span><span class="w"> </span><span class="nf">crawl</span><span class="p">(</span><span class="n">context</span><span class="p">):</span> </span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a> <span class="n">response</span> <span class="o">=</span> <span class="n">context</span><span class="o">.</span><span class="n">http</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">context</span><span class="o">.</span><span class="n">dataset</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">url</span><span class="p">)</span> </span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a> </span><span id="__span-4-6"><a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a> <span class="c1"># Parse the HTTP response into an lxml DOM:</span> </span><span id="__span-4-7"><a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a> <span class="n">doc</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> </span><span id="__span-4-8"><a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a> </span><span id="__span-4-9"><a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a> <span class="c1"># Query the DOM for specific elements to extract data from:</span> </span><span id="__span-4-10"><a id="__codelineno-4-10" name="__codelineno-4-10" href="#__codelineno-4-10"></a> <span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">doc</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="s1">&#39;.//div[@class=&quot;person&quot;]&#39;</span><span class="p">):</span> </span><span id="__span-4-11"><a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a> <span class="n">context</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Element&quot;</span><span class="p">,</span> <span class="n">element</span><span class="o">=</span><span class="n">element</span><span class="p">)</span> </span></code></pre></div> <p>Responses from the <code>context.http</code> session can also be cached using built-in helper methods:</p> <div class="language-python highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">lxml</span><span class="w"> </span><span class="kn">import</span> <span class="n">html</span> </span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a> </span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="k">def</span><span class="w"> </span><span class="nf">crawl</span><span class="p">(</span><span class="n">context</span><span class="p">):</span> </span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a> <span class="c1"># Fetch, cache and parse the HTTP response into an lxml DOM:</span> </span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a> <span class="n">doc</span> <span class="o">=</span> <span class="n">context</span><span class="o">.</span><span class="n">fetch_html</span><span class="p">(</span><span class="n">context</span><span class="o">.</span><span class="n">dataset</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="n">cache_days</span><span class="o">=</span><span class="mi">7</span><span class="p">)</span> </span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a> </span><span id="__span-5-7"><a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a> <span class="c1"># Query the DOM for specific elements to extract data from:</span> </span><span id="__span-5-8"><a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a> <span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">doc</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="s1">&#39;.//div[@class=&quot;person&quot;]&#39;</span><span class="p">):</span> </span><span id="__span-5-9"><a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a> <span class="n">context</span><span class="o">.</span><span class="n">log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Element&quot;</span><span class="p">,</span> <span class="n">element</span><span class="o">=</span><span class="n">element</span><span class="p">)</span> </span></code></pre></div> <h3 id="creating-and-emitting-entities">Creating and emitting entities</h3> <p>The goal of each crawler is to produce data about persons and other entities of interest. To enable this, the <a class="autorefs autorefs-internal" href="../context/#zavod.context.Context"><code>context</code></a> provides a number of helpers that construct and store <a href="https://www.opensanctions.org/docs/entities/">entities</a>:</p> <div class="language-python highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="k">def</span><span class="w"> </span><span class="nf">crawl</span><span class="p">(</span><span class="n">context</span><span class="p">):</span> </span><span id="__span-6-2"><a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a> </span><span id="__span-6-3"><a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a> <span class="c1"># Create an entity object to which other information can be assigned: </span> </span><span id="__span-6-4"><a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a> <span class="n">entity</span> <span class="o">=</span> <span class="n">context</span><span class="o">.</span><span class="n">make</span><span class="p">(</span><span class="s2">&quot;Person&quot;</span><span class="p">)</span> </span><span id="__span-6-5"><a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a> </span><span id="__span-6-6"><a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a> <span class="c1"># Each entity needs an ID which is unique within the source database, and</span> </span><span id="__span-6-7"><a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a> <span class="c1"># ideally consistent over time.</span> </span><span id="__span-6-8"><a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a> <span class="c1"># This is often ideally derived from its ID in the source database,</span> </span><span id="__span-6-9"><a id="__codelineno-6-9" name="__codelineno-6-9" href="#__codelineno-6-9"></a> <span class="c1"># or a string with the above properties. See Patterns below.</span> </span><span id="__span-6-10"><a id="__codelineno-6-10" name="__codelineno-6-10" href="#__codelineno-6-10"></a> <span class="n">entity</span><span class="o">.</span><span class="n">id</span> <span class="o">=</span> <span class="n">context</span><span class="o">.</span><span class="n">make_id</span><span class="p">(</span><span class="s1">&#39;Joseph Biden&#39;</span><span class="p">)</span> </span><span id="__span-6-11"><a id="__codelineno-6-11" name="__codelineno-6-11" href="#__codelineno-6-11"></a> </span><span id="__span-6-12"><a id="__codelineno-6-12" name="__codelineno-6-12" href="#__codelineno-6-12"></a> <span class="c1"># Assign some property values:</span> </span><span id="__span-6-13"><a id="__codelineno-6-13" name="__codelineno-6-13" href="#__codelineno-6-13"></a> <span class="n">entity</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;Joseph Robinette Biden Jr.&#39;</span><span class="p">)</span> </span><span id="__span-6-14"><a id="__codelineno-6-14" name="__codelineno-6-14" href="#__codelineno-6-14"></a> <span class="n">entity</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s1">&#39;alias&#39;</span><span class="p">,</span> <span class="s1">&#39;Joe Biden&#39;</span><span class="p">)</span> </span><span id="__span-6-15"><a id="__codelineno-6-15" name="__codelineno-6-15" href="#__codelineno-6-15"></a> <span class="n">entity</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s1">&#39;birthDate&#39;</span><span class="p">,</span> <span class="s1">&#39;1942-11-20&#39;</span><span class="p">)</span> </span><span id="__span-6-16"><a id="__codelineno-6-16" name="__codelineno-6-16" href="#__codelineno-6-16"></a> </span><span id="__span-6-17"><a id="__codelineno-6-17" name="__codelineno-6-17" href="#__codelineno-6-17"></a> <span class="c1"># Invalid property values (&#39;never&#39; is not a date) will produce a log</span> </span><span id="__span-6-18"><a id="__codelineno-6-18" name="__codelineno-6-18" href="#__codelineno-6-18"></a> <span class="c1"># error:</span> </span><span id="__span-6-19"><a id="__codelineno-6-19" name="__codelineno-6-19" href="#__codelineno-6-19"></a> <span class="n">entity</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s1">&#39;deathDate&#39;</span><span class="p">,</span> <span class="s1">&#39;never&#39;</span><span class="p">)</span> </span><span id="__span-6-20"><a id="__codelineno-6-20" name="__codelineno-6-20" href="#__codelineno-6-20"></a> </span><span id="__span-6-21"><a id="__codelineno-6-21" name="__codelineno-6-21" href="#__codelineno-6-21"></a> <span class="c1"># Store or update the entity in the database:</span> </span><span id="__span-6-22"><a id="__codelineno-6-22" name="__codelineno-6-22" href="#__codelineno-6-22"></a> <span class="n">context</span><span class="o">.</span><span class="n">emit</span><span class="p">(</span><span class="n">entity</span><span class="p">)</span> </span></code></pre></div> <p>The <a class="autorefs autorefs-internal" href="../context/#zavod.entity.Entity">entity object</a> is based on the <a href="https://followthemoney.tech/reference/python/followthemoney/proxy.html#EntityProxy">entity proxy in FollowTheMoney</a>, so we suggest you also check out the <a href="https://followthemoney.tech/docs/api/">FtM documentation</a> on entity construction. Some additional utility methods are added in the <a class="autorefs autorefs-internal" href="../context/#zavod.entity.Entity"><code>Entity</code></a> class in <code>zavod</code>.</p> <h3 id="verifying-your-output">Verifying your output</h3> <p>Now that you're extracting data, it's a good idea to start verifying your output. Start by exportng your crawler's data:</p> <div class="language-bash highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>zavod<span class="w"> </span><span class="nb">export</span><span class="w"> </span>datasets/eu/fsf/eu_fsf_demo.yml </span></code></pre></div> <p>This will log a number of different file types that are exported by default. A nice way to explore the output is using the JSON command line utility <code>jq</code> and your favourite text pager like <code>less</code> together to browse and search within the <a href="https://www.opensanctions.org/docs/bulk/json/#:~:text=targets.nested.json"><code>targets.nested.json</code></a> and <code>statistics.json</code> outputs using a command like</p> <div class="language-bash highlight"><pre><span></span><code><span id="__span-8-1"><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>jq<span class="w"> </span>.<span class="w"> </span>data/datasets/eu_fsf_demo/targets.nested.json<span class="w"> </span>--color-output<span class="w"> </span><span class="p">|</span><span class="w"> </span>less<span class="w"> </span>-R </span></code></pre></div> <p>Good things to check are</p> <ul> <li>The number of entities produced of each type are as expected for your dataset</li> <li>Spot checking some specific persons, companies, and relations between them, as relevant to your data</li> <li>Any warnings in the crawler output</li> </ul> <h3 id="add-your-crawler-to-a-collection">Add your crawler to a collection</h3> <p>Our data is mostly used within a broader collection of datasets, and less often by accessing a specific dataset directly.</p> <p>Add your crawler to the most appropriate <a href="https://github.com/opensanctions/opensanctions/tree/main/datasets/_collections">collection</a> based on the <a href="https://www.opensanctions.org/docs/topics/">kind of entities</a> it is adding. Look for <a href="https://www.opensanctions.org/datasets/">similar datasets</a> and see which collection they are directly included in.</p> <p>Broader collections include more specific collections and/or specific crawlers.</p> <h3 id="next-steps">Next steps</h3> <p>You may now want to level up your crawler by looking at</p> <ul> <li><a href="../helpers/">helpers</a> for common tasks,</li> <li><a href="../best_practices/patterns/">common patterns</a> for building crawlers,</li> <li>the <a href="../peps/">PEPs guide</a> if you're crawling Politically Exposed Persons and their Relatives and Close Associates.</li> <li>the <a href="../best_practices/merge_checklist/">merge checklist</a> we'll follow before merging a new crawler</li> </ul> <h2 id="checklist">Checklist</h2> <p>When contributing a new data source, or some other change, make sure of the following:</p> <ul> <li>You've created a metadata YAML file with detailed descriptions and links to the source URL.</li> <li>Your code should run after doing a simple <code>pip install</code> of the codebase. Include additional dependencies in the <code>setup.py</code>. Don't use non-Python dependencies like <code>Headless Chrome</code> or <code>Selenium</code>.</li> <li>The output data for your crawler should be Follow The Money objects. If you need more fields added to the ontology, submit a pull request upstream. Don't include left-over data in an improvised way.</li> <li>Include verbose logging in your crawler. Make sure that new fields or enum values introduced upstream (e.g. a new country code or sanction program) will cause a warning to be emitted. <a href="https://www.opensanctions.org/issues">Warnings</a> are checked regularly to identify when a crawler needs attention. Info and lower level logs are useful for debugging with the <code>-v</code> flag.</li> <li>Make sure your Python code is linted and formatted with <code>black</code>.</li> <li>Make sure your yaml is linted with yamllint.</li> </ul> </article> </div> <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> </div> </main> <footer class="md-footer"> <div class="md-footer-meta md-typeset"> <div class="md-footer-meta__inner md-grid"> <div class="md-copyright"> Made with <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener"> Material for MkDocs </a> </div> <div class="md-social"> <a href="https://www.opensanctions.org/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M16.36 14c.08-.66.14-1.32.14-2s-.06-1.34-.14-2h3.38c.16.64.26 1.31.26 2s-.1 1.36-.26 2m-5.15 5.56c.6-1.11 1.06-2.31 1.38-3.56h2.95a8.03 8.03 0 0 1-4.33 3.56M14.34 14H9.66c-.1-.66-.16-1.32-.16-2s.06-1.35.16-2h4.68c.09.65.16 1.32.16 2s-.07 1.34-.16 2M12 19.96c-.83-1.2-1.5-2.53-1.91-3.96h3.82c-.41 1.43-1.08 2.76-1.91 3.96M8 8H5.08A7.92 7.92 0 0 1 9.4 4.44C8.8 5.55 8.35 6.75 8 8m-2.92 8H8c.35 1.25.8 2.45 1.4 3.56A8 8 0 0 1 5.08 16m-.82-2C4.1 13.36 4 12.69 4 12s.1-1.36.26-2h3.38c-.08.66-.14 1.32-.14 2s.06 1.34.14 2M12 4.03c.83 1.2 1.5 2.54 1.91 3.97h-3.82c.41-1.43 1.08-2.77 1.91-3.97M18.92 8h-2.95a15.7 15.7 0 0 0-1.38-3.56c1.84.63 3.37 1.9 4.33 3.56M12 2C6.47 2 2 6.5 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10A10 10 0 0 0 12 2"/></svg> </a> <a href="https://www.opensanctions.org/contact/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 4H4c-1.11 0-2 .89-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2m-3 13H7v-2h10m0-2H7v-2h10m3-2h-3V6h3"/></svg> </a> <a href="https://www.opensanctions.org/slack/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M6 15a2 2 0 0 1-2 2 2 2 0 0 1-2-2 2 2 0 0 1 2-2h2zm1 0a2 2 0 0 1 2-2 2 2 0 0 1 2 2v5a2 2 0 0 1-2 2 2 2 0 0 1-2-2zm2-8a2 2 0 0 1-2-2 2 2 0 0 1 2-2 2 2 0 0 1 2 2v2zm0 1a2 2 0 0 1 2 2 2 2 0 0 1-2 2H4a2 2 0 0 1-2-2 2 2 0 0 1 2-2zm8 2a2 2 0 0 1 2-2 2 2 0 0 1 2 2 2 2 0 0 1-2 2h-2zm-1 0a2 2 0 0 1-2 2 2 2 0 0 1-2-2V5a2 2 0 0 1 2-2 2 2 0 0 1 2 2zm-2 8a2 2 0 0 1 2 2 2 2 0 0 1-2 2 2 2 0 0 1-2-2v-2zm0-1a2 2 0 0 1-2-2 2 2 0 0 1 2-2h5a2 2 0 0 1 2 2 2 2 0 0 1-2 2z"/></svg> </a> <a href="https://github.com/opensanctions/opensanctions" target="_blank" rel="noopener" title="github.com" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 2A10 10 0 0 0 2 12c0 4.42 2.87 8.17 6.84 9.5.5.08.66-.23.66-.5v-1.69c-2.77.6-3.36-1.34-3.36-1.34-.46-1.16-1.11-1.47-1.11-1.47-.91-.62.07-.6.07-.6 1 .07 1.53 1.03 1.53 1.03.87 1.52 2.34 1.07 2.91.83.09-.65.35-1.09.63-1.34-2.22-.25-4.55-1.11-4.55-4.92 0-1.11.38-2 1.03-2.71-.1-.25-.45-1.29.1-2.64 0 0 .84-.27 2.75 1.02.79-.22 1.65-.33 2.5-.33s1.71.11 2.5.33c1.91-1.29 2.75-1.02 2.75-1.02.55 1.35.2 2.39.1 2.64.65.71 1.03 1.6 1.03 2.71 0 3.82-2.34 4.66-4.57 4.91.36.31.69.92.69 1.85V21c0 .27.16.59.67.5C19.14 20.16 22 16.42 22 12A10 10 0 0 0 12 2"/></svg> </a> </div> </div> </div> </footer> </div> <div class="md-dialog" data-md-component="dialog"> <div class="md-dialog__inner md-typeset"></div> </div> <script id="__config" type="application/json">{"base": "..", "features": [], "search": "../assets/javascripts/workers/search.f8cc74c7.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script> <script src="../assets/javascripts/bundle.f1b6f286.min.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10