CINXE.COM

zavod

<!doctype html> <html lang="en" class="no-js"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width,initial-scale=1"> <link rel="next" href="install/"> <link rel="icon" href="https://assets.opensanctions.org/images/favicon-32x32.png"> <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.4"> <title>zavod</title> <link rel="stylesheet" href="assets/stylesheets/main.8608ea7d.min.css"> <link rel="stylesheet" href="assets/stylesheets/palette.06af60db.min.css"> <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"> <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style> <link rel="stylesheet" href="assets/_mkdocstrings.css"> <script>__md_scope=new URL(".",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script> </head> <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="custom" data-md-color-accent="custom"> <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off"> <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off"> <label class="md-overlay" for="__drawer"></label> <div data-md-component="skip"> <a href="#zavod-data-factory" class="md-skip"> Skip to content </a> </div> <div data-md-component="announce"> </div> <header class="md-header md-header--shadow" data-md-component="header"> <nav class="md-header__inner md-grid" aria-label="Header"> <a href="." title="zavod" class="md-header__button md-logo" aria-label="zavod" data-md-component="logo"> <img src="https://assets.opensanctions.org/images/ura/logo_white.png" alt="logo"> </a> <label class="md-header__button md-icon" for="__drawer"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg> </label> <div class="md-header__title" data-md-component="header-title"> <div class="md-header__ellipsis"> <div class="md-header__topic"> <span class="md-ellipsis"> zavod </span> </div> <div class="md-header__topic" data-md-component="header-topic"> <span class="md-ellipsis"> Overview </span> </div> </div> </div> <div class="md-header__source"> <a href="https://github.com/opensanctions/opensanctions" title="Go to repository" class="md-source" data-md-component="source"> <div class="md-source__icon md-icon"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> </div> <div class="md-source__repository"> GitHub </div> </a> </div> </nav> </header> <div class="md-container" data-md-component="container"> <main class="md-main" data-md-component="main"> <div class="md-main__inner md-grid"> <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" > <div class="md-sidebar__scrollwrap"> <div class="md-sidebar__inner"> <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0"> <label class="md-nav__title" for="__drawer"> <a href="." title="zavod" class="md-nav__button md-logo" aria-label="zavod" data-md-component="logo"> <img src="https://assets.opensanctions.org/images/ura/logo_white.png" alt="logo"> </a> zavod </label> <div class="md-nav__source"> <a href="https://github.com/opensanctions/opensanctions" title="Go to repository" class="md-source" data-md-component="source"> <div class="md-source__icon md-icon"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg> </div> <div class="md-source__repository"> GitHub </div> </a> </div> <ul class="md-nav__list" data-md-scrollfix> <li class="md-nav__item md-nav__item--active"> <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc"> <label class="md-nav__link md-nav__link--active" for="__toc"> <span class="md-ellipsis"> Overview </span> <span class="md-nav__icon md-icon"></span> </label> <a href="." class="md-nav__link md-nav__link--active"> <span class="md-ellipsis"> Overview </span> </a> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> <label class="md-nav__title" for="__toc"> <span class="md-nav__icon md-icon"></span> Table of contents </label> <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> <li class="md-nav__item"> <a href="#getting-started" class="md-nav__link"> <span class="md-ellipsis"> Getting started </span> </a> </li> <li class="md-nav__item"> <a href="#further-references" class="md-nav__link"> <span class="md-ellipsis"> Further references </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="install/" class="md-nav__link"> <span class="md-ellipsis"> Installation </span> </a> </li> <li class="md-nav__item"> <a href="tutorial/" class="md-nav__link"> <span class="md-ellipsis"> Tutorial </span> </a> </li> <li class="md-nav__item"> <a href="usage/" class="md-nav__link"> <span class="md-ellipsis"> Command-line usage </span> </a> </li> <li class="md-nav__item"> <a href="metadata/" class="md-nav__link"> <span class="md-ellipsis"> Dataset metadata </span> </a> </li> <li class="md-nav__item"> <a href="context/" class="md-nav__link"> <span class="md-ellipsis"> Context </span> </a> </li> <li class="md-nav__item"> <a href="helpers/" class="md-nav__link"> <span class="md-ellipsis"> Helpers </span> </a> </li> <li class="md-nav__item md-nav__item--nested"> <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" > <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0"> <span class="md-ellipsis"> Best practices </span> <span class="md-nav__icon md-icon"></span> </label> <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false"> <label class="md-nav__title" for="__nav_8"> <span class="md-nav__icon md-icon"></span> Best practices </label> <ul class="md-nav__list" data-md-scrollfix> <li class="md-nav__item"> <a href="best_practices/caching/" class="md-nav__link"> <span class="md-ellipsis"> Caching Considerations </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/patterns/" class="md-nav__link"> <span class="md-ellipsis"> Common Patterns </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/datapatch_lookups/" class="md-nav__link"> <span class="md-ellipsis"> Datapatch lookups </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/dates_meta/" class="md-nav__link"> <span class="md-ellipsis"> Date parsing with dataset metadata </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/http_operations/" class="md-nav__link"> <span class="md-ellipsis"> HTTP Operations </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/priorities/" class="md-nav__link"> <span class="md-ellipsis"> Data priorities </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/merge_checklist/" class="md-nav__link"> <span class="md-ellipsis"> Checklist when reviewing a crawler </span> </a> </li> <li class="md-nav__item"> <a href="best_practices/xpath_and_html/" class="md-nav__link"> <span class="md-ellipsis"> XPath and HTML </span> </a> </li> </ul> </nav> </li> <li class="md-nav__item"> <a href="peps/" class="md-nav__link"> <span class="md-ellipsis"> PEPs </span> </a> </li> <li class="md-nav__item"> <a href="wikidata/" class="md-nav__link"> <span class="md-ellipsis"> Wikidata </span> </a> </li> </ul> </nav> </div> </div> </div> <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" > <div class="md-sidebar__scrollwrap"> <div class="md-sidebar__inner"> <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> <label class="md-nav__title" for="__toc"> <span class="md-nav__icon md-icon"></span> Table of contents </label> <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> <li class="md-nav__item"> <a href="#getting-started" class="md-nav__link"> <span class="md-ellipsis"> Getting started </span> </a> </li> <li class="md-nav__item"> <a href="#further-references" class="md-nav__link"> <span class="md-ellipsis"> Further references </span> </a> </li> </ul> </nav> </div> </div> </div> <div class="md-content" data-md-component="content"> <article class="md-content__inner md-typeset"> <h1 id="zavod-data-factory">zavod Data Factory</h1> <p>This page contains documentation for the data processing framework used by <a href="https://www.opensanctions.org/">OpenSanctions</a>. It's called <code>zavod</code> (<a href="https://en.wikipedia.org/wiki/Zavod">name</a>) in order to distinguish it from some of the other software components used by the OpenSanctions project.</p> <p><code>zavod</code> provides a runtime context and a set of helpers for running crawler scripts that capture data from any online source, convert it to the <a href="https://followthemoney.tech"><code>followthemoney</code></a> data model, store the output and eventually produce the export files used by OpenSanctions' data consumers.</p> <h2 id="getting-started">Getting started</h2> <ul> <li><a href="install/">Installation</a> of zavod on your machine</li> <li><a href="tutorial/">Tutorial</a>: how to add a crawler</li> <li><a href="metadata/">Metadata</a>: how to write excellent dataset metadata</li> </ul> <h2 id="further-references">Further references</h2> <ul> <li><a href="https://www.opensanctions.org/docs/criteria/">Data inclusion criteria</a> - what data will be included in OpenSanctions?</li> <li><a href="https://www.opensanctions.org/docs/entities/">What is an entity?</a> - intro to the notion of data entities.</li> <li><a href="https://followthemoney.tech">FollowTheMoney</a> data model documentation, and the OpenSanctions <a href="https://www.opensanctions.org/reference/">data dictionary</a> (we use a subset of the schemata offered by FollowTheMoney).</li> <li><a href="https://www.opensanctions.org/docs/statements/">Statement-based data model</a> - the data model used by <code>zavod</code> during processing.</li> </ul> </article> </div> <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> </div> </main> <footer class="md-footer"> <div class="md-footer-meta md-typeset"> <div class="md-footer-meta__inner md-grid"> <div class="md-copyright"> Made with <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener"> Material for MkDocs </a> </div> <div class="md-social"> <a href="https://www.opensanctions.org/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M16.36 14c.08-.66.14-1.32.14-2s-.06-1.34-.14-2h3.38c.16.64.26 1.31.26 2s-.1 1.36-.26 2m-5.15 5.56c.6-1.11 1.06-2.31 1.38-3.56h2.95a8.03 8.03 0 0 1-4.33 3.56M14.34 14H9.66c-.1-.66-.16-1.32-.16-2s.06-1.35.16-2h4.68c.09.65.16 1.32.16 2s-.07 1.34-.16 2M12 19.96c-.83-1.2-1.5-2.53-1.91-3.96h3.82c-.41 1.43-1.08 2.76-1.91 3.96M8 8H5.08A7.92 7.92 0 0 1 9.4 4.44C8.8 5.55 8.35 6.75 8 8m-2.92 8H8c.35 1.25.8 2.45 1.4 3.56A8 8 0 0 1 5.08 16m-.82-2C4.1 13.36 4 12.69 4 12s.1-1.36.26-2h3.38c-.08.66-.14 1.32-.14 2s.06 1.34.14 2M12 4.03c.83 1.2 1.5 2.54 1.91 3.97h-3.82c.41-1.43 1.08-2.77 1.91-3.97M18.92 8h-2.95a15.7 15.7 0 0 0-1.38-3.56c1.84.63 3.37 1.9 4.33 3.56M12 2C6.47 2 2 6.5 2 12a10 10 0 0 0 10 10 10 10 0 0 0 10-10A10 10 0 0 0 12 2"/></svg> </a> <a href="https://www.opensanctions.org/contact/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 4H4c-1.11 0-2 .89-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V6a2 2 0 0 0-2-2m-3 13H7v-2h10m0-2H7v-2h10m3-2h-3V6h3"/></svg> </a> <a href="https://www.opensanctions.org/slack/" target="_blank" rel="noopener" title="www.opensanctions.org" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M6 15a2 2 0 0 1-2 2 2 2 0 0 1-2-2 2 2 0 0 1 2-2h2zm1 0a2 2 0 0 1 2-2 2 2 0 0 1 2 2v5a2 2 0 0 1-2 2 2 2 0 0 1-2-2zm2-8a2 2 0 0 1-2-2 2 2 0 0 1 2-2 2 2 0 0 1 2 2v2zm0 1a2 2 0 0 1 2 2 2 2 0 0 1-2 2H4a2 2 0 0 1-2-2 2 2 0 0 1 2-2zm8 2a2 2 0 0 1 2-2 2 2 0 0 1 2 2 2 2 0 0 1-2 2h-2zm-1 0a2 2 0 0 1-2 2 2 2 0 0 1-2-2V5a2 2 0 0 1 2-2 2 2 0 0 1 2 2zm-2 8a2 2 0 0 1 2 2 2 2 0 0 1-2 2 2 2 0 0 1-2-2v-2zm0-1a2 2 0 0 1-2-2 2 2 0 0 1 2-2h5a2 2 0 0 1 2 2 2 2 0 0 1-2 2z"/></svg> </a> <a href="https://github.com/opensanctions/opensanctions" target="_blank" rel="noopener" title="github.com" class="md-social__link"> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 2A10 10 0 0 0 2 12c0 4.42 2.87 8.17 6.84 9.5.5.08.66-.23.66-.5v-1.69c-2.77.6-3.36-1.34-3.36-1.34-.46-1.16-1.11-1.47-1.11-1.47-.91-.62.07-.6.07-.6 1 .07 1.53 1.03 1.53 1.03.87 1.52 2.34 1.07 2.91.83.09-.65.35-1.09.63-1.34-2.22-.25-4.55-1.11-4.55-4.92 0-1.11.38-2 1.03-2.71-.1-.25-.45-1.29.1-2.64 0 0 .84-.27 2.75 1.02.79-.22 1.65-.33 2.5-.33s1.71.11 2.5.33c1.91-1.29 2.75-1.02 2.75-1.02.55 1.35.2 2.39.1 2.64.65.71 1.03 1.6 1.03 2.71 0 3.82-2.34 4.66-4.57 4.91.36.31.69.92.69 1.85V21c0 .27.16.59.67.5C19.14 20.16 22 16.42 22 12A10 10 0 0 0 12 2"/></svg> </a> </div> </div> </div> </footer> </div> <div class="md-dialog" data-md-component="dialog"> <div class="md-dialog__inner md-typeset"></div> </div> <script id="__config" type="application/json">{"base": ".", "features": [], "search": "assets/javascripts/workers/search.f8cc74c7.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script> <script src="assets/javascripts/bundle.f1b6f286.min.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10