CINXE.COM
<!doctype html> <html lang="en" dir="ltr" class="plugin-pages plugin-id-default" data-has-hydrated="false"> <head> <meta charset="UTF-8"> <meta name="generator" content="Docusaurus v3.0.0-rc.1"> <title data-rh="true">Is Web Scraping Ethical? | ScrapeOps</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" property="og:url" content="https://scrapeops.io/web-scraping-playbook/ethics-of-web-scraping/"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docusaurus_tag" content="default"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docsearch:docusaurus_tag" content="default"><meta data-rh="true" name="robots" content="max-image-preview:large"><meta data-rh="true" property="og:title" content="Is Web Scraping Ethical? | ScrapeOps"><meta data-rh="true" name="description" content="Is web scraping unethical? The case for web scraping and the case against, and how to be an ethical web scraper."><meta data-rh="true" property="og:description" content="Is web scraping unethical? The case for web scraping and the case against, and how to be an ethical web scraper."><meta data-rh="true" property="og:image" content="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-web-scraper-big.jpg"><meta data-rh="true" property="twitter:image" content="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-web-scraper-big.jpg"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" name="twitter:site" content="@ScrapeOps"><link data-rh="true" rel="icon" href="/img/favicon.svg"><link data-rh="true" rel="canonical" href="https://scrapeops.io/web-scraping-playbook/ethics-of-web-scraping/"><link data-rh="true" rel="alternate" href="https://scrapeops.io/web-scraping-playbook/ethics-of-web-scraping/" hreflang="en"><link data-rh="true" rel="alternate" href="https://scrapeops.io/web-scraping-playbook/ethics-of-web-scraping/" hreflang="x-default"><script data-rh="true" type="application/ld+json"> { "@context": "https://schema.org", "@type": "Article", "headline": "Is Web Scraping Ethical?", "description": "Is web scraping unethical? The case for web scraping and the case against, and how to be an ethical web scraper.", "image": "https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-web-scraper-big.jpg", "author": { "@type": "Person", "name": "Ian from ScrapeOps" }, "publisher": { "@type": "Organization", "name": "ScrapeOps", "logo": { "@type": "ImageObject", "url": "https://scrapeops.io/img/logo.png" } }, "datePublished": "2022-06-05T08:56:56Z", "dateModified": "2022-06-05T08:56:56Z", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://scrapeops.io/web-scraping-playbook/ethics-of-web-scraping/" } } </script><script data-rh="true" type="application/ld+json"> { "@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Home", "item": "https://scrapeops.io/"}, {"@type": "ListItem", "position": 2, "name": "Web Scraping Playbook", "item": "https://scrapeops.io/web-scraping-playbook/"}, {"@type": "ListItem", "position": 3, "name": "Ethics Of Web Scraping", "item": "https://scrapeops.io/web-scraping-playbook/ethics-of-web-scraping/"}] } </script><link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="ScrapeOps RSS Feed"> <link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="ScrapeOps Atom Feed"> <link rel="preconnect" href="https://www.google-analytics.com"> <link rel="preconnect" href="https://www.googletagmanager.com"> <script async src="https://www.googletagmanager.com/gtag/js?id=G-QJSW9S9YH4"></script> <script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-QJSW9S9YH4",{anonymize_ip:!0})</script> <script src="/get-refferer.js"></script> <script src="https://cdn.firstpromoter.com/fpr.js" async></script><link rel="stylesheet" href="/assets/css/styles.0a6705c8.css"> <script src="/assets/js/runtime~main.75ee2ef1.js" defer="defer"></script> <script src="/assets/js/main.2eada272.js" defer="defer"></script> </head> <body class="navigation-with-keyboard"> <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const a=new URLSearchParams(window.location.search).entries();for(var[t,e]of a)if(t.startsWith("docusaurus-data-")){var n=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(n,e)}}catch(t){}}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_s0pr" style="background-color:#0d53d7;color:#fff" role="banner"><div class="announcementBarContent_dpRF">Need a proxy solution? Try ScrapeOps and get <a target="_self" href="https://scrapeops.io/app/register/proxy/">1,000 free requests here</a>, or compare all proxy providers <a target="_self" href="https://scrapeops.io/proxy-providers/comparison/">here</a>!</div></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/scrapeops-logo.svg" alt="ScrapeOps Logo" class="themedComponent_mlkZ themedComponent--light_NVdE" height="24px" width="18px"><img src="/img/scrapeops-logo.svg" alt="ScrapeOps Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU" height="24px" width="18px"></div><b class="navbar__title text--truncate">ScrapeOps</b></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Solutions</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/proxy-aggregator/">Proxy Aggregator</a></li><li><a class="dropdown__link" href="/monitoring-scheduling/">Monitoring & Scheduler</a></li></ul></div><a class="navbar__item navbar__link" href="/docs/intro/">Docs</a><a href="https://scrapeops.io/proxy-providers/comparison/" target="_self" rel="noopener noreferrer" class="navbar__item navbar__link">Proxy Comparison<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Guides</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/web-scraping-playbook/">Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/python-web-scraping-playbook/">Python Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/nodejs-web-scraping-playbook/">NodeJs Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/python-scrapy-playbook/">Python Scrapy Playbook</a></li><li><a class="dropdown__link" href="/selenium-web-scraping-playbook/">Selenium Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/puppeteer-web-scraping-playbook/">Puppeteer Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/playwright-web-scraping-playbook/">Playwright Web Scraping Playbook</a></li></ul></div><a href="https://scrapeops.io/app/login/" target="_self" rel="noopener noreferrer" class="navbar__item navbar__link">Login<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><a href="https://scrapeops.io/app/register/main/" target="_self" rel="noopener noreferrer" class="navbar__item navbar__link">Signup<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper mdx-wrapper"><main class="container container--fluid margin-vert--lg"><div class="row mdxPageWrapper_yV3F"><div class="col col--8"> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-web-scraper-big.jpg" alt="The Web Scraping Playbook - The Ethics of Web Scraping" fetchpriority="high" loading="eager" class="top-header-img img_CujE"></p> <h1>Is Web Scraping Ethical?</h1> <p><strong>Question:</strong> <em>"Is web scraping ethical?"</em></p> <p><strong>Not:</strong> <em>"Is web scraping legal?"</em></p> <p>As that is a different question which brings you down the rabbit hole of court cases, data protection and copyright laws.</p> <p><strong>Just:</strong> <em>Is the act of scraping data from someones website ethical?</em></p> <p>It is a simple question, but actually quite hard to get a consensus on a answer. Primarily because web scraping is so polarizing.</p> <p>Some people see web scraping as a <strong>sinister menace on the internet</strong>, stealing company data, impacting user experience for real users, and driving up infrastructure costs.</p> <p>Whilst others argue web scraping is simply the conversion of HTML data on public pages into more machine-readable data formats. Or tout <strong>web scrapings positive impacts</strong> as web scraping powers numerous products consumers and businesses love.</p> <p>This is a polarising topic, one rife with hypocrisy, so we're going to view it from both sides and explore:</p> <ul> <li><a href="#the-ethical-case-against-web-scraping">The Ethical Case <strong>Against</strong> Web Scraping</a></li> <li><a href="#the-ethical-case-for-web-scraping">The Ethical Case <strong>For</strong> Web Scraping</a></li> <li><a href="#the-ethical-hypocrisy-of-companies">The Ethical Hypocrisy Of Companies</a></li> <li><a href="#a-manifesto-for-the-responsible-scraper">A Manifesto For The Responsible Scraper</a></li> <li><a href="#be-a-truely-ethical-scraper">Be A Truely Ethical Scraper</a></li> <li><a href="#the-ethical-website-owner">The Ethical Website Owner</a></li> <li><a href="#verdict">Verdict</a></li> </ul> <p>Ultimately, suggesting some <strong>principles for an ethical web scraper</strong>.</p> <!-- --> <!-- --> <div style="background-color:#f6fafe;border-radius:2px;border:1px solid #f0f0f3;padding:1.5rem;padding-bottom:2.5rem;padding-top:2.5rem;text-align:center;margin-top:2rem"><h3>Need help scraping the web?</h3><p style="margin-bottom:2rem">Then check out <a href="https://scrapeops.io/?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=ethics-of-web-scraping" style="color:#000;text-decoration:none;border-bottom:2px dashed #0d53d7;font-weight:600">ScrapeOps</a>, the complete toolkit for web scraping.</p><div style="display:flex;flex-direction:row;flex-wrap:wrap;justify-content:center"><div><div><img style="height:80px;width:80px" src="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Icons/scrapeops-proxy-aggregator-icon.svg" alt="ScrapeOps Proxy Manager"></div><div><a href="https://scrapeops.io/proxy-aggregator/?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=ethics-of-web-scraping" style="color:#000;text-decoration:none;font-weight:600">Proxy Manager</a></div></div><div style="margin-left:3.5rem;margin-right:3.5rem"><div><img style="height:80px;width:80px" src="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Icons/scrapeops-monitoring-icon.svg" alt="ScrapeOps Monitoring"></div><div><a href="https://scrapeops.io/monitoring-scheduling/?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=ethics-of-web-scraping" style="color:#000;text-decoration:none;font-weight:600">Scraper Monitoring</a></div></div><div><div><img style="height:80px;width:80px" src="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Icons/scrapeops-scheduler-icon.svg" alt="ScrapeOps Job Scheduling"></div><div><a href="https://scrapeops.io/monitoring-scheduling?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=ethics-of-web-scraping" style="color:#000;text-decoration:none;font-weight:600">Job Scheduling</a></div></div></div></div> <hr> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-case-against-web-scraping.jpg" alt="The Web Scraping Playbook - The Ethical Case Against Web Scraping" loading="lazy" class="img_CujE"></p> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="the-ethical-case-against-web-scraping">The Ethical Case Against Web Scraping<a href="#the-ethical-case-against-web-scraping" class="hash-link" aria-label="Direct link to The Ethical Case Against Web Scraping" title="Direct link to The Ethical Case Against Web Scraping"></a></h2> <p>The case for why web scraping is unethical is pretty straightforward.</p> <p>If I'm a company, and I say in my <strong>robots.txt</strong>, and my <strong>Terms of Service</strong> that the automated scraping of my site is prohibited, and if you proceed to scrape my data, then doesn't that make web scraping my content unethical?</p> <p>A clause like this in someones <strong>Terms of Service</strong> could not be more explicit:</p> <blockquote> <p>You may only use or reproduce the Content for your own personal and non-commercial use. The framing, scraping, data-mining, extraction or collection of the Content of the Sites in any form and by any means whatsoever is strictly prohibited. Furthermore, you may not mirror any material contained on this Sites.</p> </blockquote> <p>Here, I have clearly stated that the data I have assembled and published on my website should not be scraped, so <strong>why would it be ethical for anyone to do so?</strong></p> <p>Not only that, if someone proceeds to scrape my website and I ban their IP address with a message <strong>"web scraping is prohibited"</strong>, but instead they revert to hiding their identity with proxies and fake user agents. Then they can be under no illusion that I don't want my data to be scraped.</p> <p>Isn't that proof enough as to why <strong>web scraping is unethical</strong>?</p> <hr> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-case-in-favour-of-web-scraping.jpg" alt="The Web Scraping Playbook - The Ethical Case For Web Scraping" loading="lazy" class="img_CujE"></p> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="the-ethical-case-for-web-scraping">The Ethical Case For Web Scraping<a href="#the-ethical-case-for-web-scraping" class="hash-link" aria-label="Direct link to The Ethical Case For Web Scraping" title="Direct link to The Ethical Case For Web Scraping"></a></h2> <p>The other perspectice, is that of the <strong>web scraper</strong>.</p> <p>Often their point of view is that the data has been published publically on the open web, and all they are doing is converting HTML into machine readable JSON.</p> <p>Bringing value to the world by using it to create new products that are a benefit to society as a whole, and oftentimes adding value to the original data owners product ecosystem.</p> <p>In some circumstances, they make be violating the websites <strong>robots.txt</strong> or <strong>Terms of Service</strong>, but if the website has made the data public and the scraped data is providing value to society as a whole <strong>doesn't that make web scraping ethical?</strong></p> <p>Obviously, there are types of web scraping that break this arguement and are very questionable ethically.</p> <ul> <li>Scraping personal information with the purpose of harming or annoying a person.</li> <li>Scraping data from websites and directly replicating it on their own website.</li> <li>Scalpers using bots to create multipe accounts on sneaker and ticketing websites, and having their bots take offers from real users. Then reselling them at higher prices.</li> <li>Spamming the comments of someones blog, YouTube videos, or Twitter DMs, with untargeted advertising or the promotion of scams.</li> </ul> <p>And yes, web scraping with bad practices can put extra load on a websites servers, <strong>impacting user experience</strong> and <strong>increasing server costs</strong>.</p> <p>However, a responsible web scraper can mitigate all/some of these issues with a well designed scraping architecture.</p> <p>And if the original publishers of the data provided a public API, or another way of consuming the data, then web scrapers would use that instead. Virtually removing the infrastructure burden.</p> <p><strong>Is this enough to redeem web scraping and make it ethical?</strong></p> <hr> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-ethical-hypocrisy.jpg" alt="The Web Scraping Playbook - The Ethical Hypocrisy Of Companies" loading="lazy" class="img_CujE"></p> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="the-ethical-hypocrisy-of-companies">The Ethical Hypocrisy Of Companies<a href="#the-ethical-hypocrisy-of-companies" class="hash-link" aria-label="Direct link to The Ethical Hypocrisy Of Companies" title="Direct link to The Ethical Hypocrisy Of Companies"></a></h2> <p>The ethical arguement of web scraping is pretty straightforward when you are only on one side of the debate. Either you are a web scraper or a website being scraped.</p> <p>However, there is a <strong>real ethical hypocrisy</strong> for companies when they are both prohibiting/blocking scraping of their own content, and web scraping themselves.</p> <p><strong>How can a company argue that web scraping is prohibited, whilst the same company is actively scraping other websites themselves?</strong></p> <p>From the monitoring data we collect using <a href="https://scrapeops.io/" target="_blank" rel="noopener noreferrer">ScrapeOps</a>, our free web scraping monitoring tool, we can say with certainty that the majority of web scraping is targetted at a handful of large websites.</p> <p>And in the vast majority of cases, whilst these companies are trying to stop people scraping their websites, at the same time they are aggressively scraping their competitors websites.</p> <p>If a company, is ignoring the Terms of Service of another website and using advanced proxy solutions to scrape someone elses data, how can they argue that <strong>their own Terms of Service should be respected?</strong></p> <p>If a company wants to prohibit the scraping of their website, then <strong>shouldn't they ban the practice within their own companies as well?</strong></p> <p>But just because a company has hypocritical ethics, does that mean it is ethical to scrape their content?</p> <hr> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/web-scraping-playbook--the-manifesto-for-ethical-web-scrapers.jpg" alt="The Web Scraping Playbook - A Manifesto For The Ethical Scraper" loading="lazy" class="img_CujE"></p> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="a-manifesto-for-the-responsible-scraper">A Manifesto For The Responsible Scraper<a href="#a-manifesto-for-the-responsible-scraper" class="hash-link" aria-label="Direct link to A Manifesto For The Responsible Scraper" title="Direct link to A Manifesto For The Responsible Scraper"></a></h2> <p>There are so many entrenched views and incentives around web scraping, that it will likely be impossible to ever reach a <strong>web scraping code of ethics</strong> that everyone agrees upon.</p> <p>However, that doesn't mean that you as a web scraper shouldn't strive to act in an <strong>ethical</strong> and <strong>responsible</strong> manner.</p> <p>So if you want to be a <strong>responsible scraper</strong> (maybe not ethical in some peoples eyes) here are some principles to live by:</p> <h4 class="anchor anchorWithStickyNavbar_LWe7" id="insprincipals-of-a-responsible-web-scraperins"><ins>Principals of a Responsible Web Scraper</ins><a href="#insprincipals-of-a-responsible-web-scraperins" class="hash-link" aria-label="Direct link to insprincipals-of-a-responsible-web-scraperins" title="Direct link to insprincipals-of-a-responsible-web-scraperins"></a></h4> <ul> <li>If you provide a public API to retrieve the data I require, I will use that instead of scraping.</li> <li>I will always endeavor to minimise the impact of my scraping by scraping at off-peak times and scraping at a reasonable rate.</li> <li>I will try to make my requests as light as possible on your servers. I will only use headless browsers when it is 100% necessary.</li> <li>If the data can be accessed via an hidden API endpoint, then I will use this instead of requesting the HTML response.</li> <li>I will only scrape the data that is essential for my requirements. If I can get the data I need from a products shelf-page, I won't scrape individual product pages.</li> <li>I will scrape with the goal of creating value with your data, not simply duplicating it.</li> <li>I will respect your copyright, and not pass off your content as my own.</li> <li>If I am causing a burden to your website, I will make changes.</li> </ul> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="be-a-truely-ethical-scraper">Be A Truely Ethical Scraper<a href="#be-a-truely-ethical-scraper" class="hash-link" aria-label="Direct link to Be A Truely Ethical Scraper" title="Direct link to Be A Truely Ethical Scraper"></a></h2> <p>Some people might disagree with the statements above. Feeling <strong>they do not go far enough</strong>.</p> <p>And in certain circumstances they might be right.</p> <p>So if you would like to be a <strong>truely ethical web scraper</strong>, we can add the following principles:</p> <h4 class="anchor anchorWithStickyNavbar_LWe7" id="insprincipals-of-a-truely-ethical-web-scraperins"><ins>Principals of a Truely Ethical Web Scraper</ins><a href="#insprincipals-of-a-truely-ethical-web-scraperins" class="hash-link" aria-label="Direct link to insprincipals-of-a-truely-ethical-web-scraperins" title="Direct link to insprincipals-of-a-truely-ethical-web-scraperins"></a></h4> <ul> <li>I will obey your <strong>Robots.txt</strong> and <strong>Terms of Service</strong> at all times. If they forbid web scraping, then I will not scrape your website.</li> <li>If I really want your data, I will reach out beforehand and seek your permission before scraping your website.</li> <li>If I do scrape your website, I will clearly identify myself in the User Agent, and provide a way for you to contact me.</li> </ul> <p>The last point, is important.</p> <p>If I am the owner of a website and I notice a surge in traffic that is impacting our websites performance, having a clear way to identify and contact a web scraper is of huge value to me.</p> <p>As web scrapers, we can make the websites owners job so much easier simply by identifying ourselves in our User-Agents, and giving them a way to contact us.</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string" style="color:rgb(173, 219, 103)">'user-agent'</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'Mozilla/5.0 (Windows NT 10.0; Win64; x64); John Doe (johndoe@gmail.com)'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="the-ethical-website-owner">The Ethical Website Owner<a href="#the-ethical-website-owner" class="hash-link" aria-label="Direct link to The Ethical Website Owner" title="Direct link to The Ethical Website Owner"></a></h2> <p>Some might disagree, but <strong>ethics in web scraping is a two-way street</strong>.</p> <p>Whilst, web scrapers have a duty to act ethically and responsibly, it can be argued that website owners have the a duty too.</p> <p>Here are some principals for a <strong>website owner</strong> to live by:</p> <h4 class="anchor anchorWithStickyNavbar_LWe7" id="insprincipals-for-a-ethical-website-ownerins"><ins>Principals for a Ethical Website Owner</ins><a href="#insprincipals-for-a-ethical-website-ownerins" class="hash-link" aria-label="Direct link to insprincipals-for-a-ethical-website-ownerins" title="Direct link to insprincipals-for-a-ethical-website-ownerins"></a></h4> <ul> <li>I acknowledge the fact that scraping is a fact of life on the open web.</li> <li>If my data is in high demand, I will consider exposing a public API as an alternative to scraping.</li> <li>If someone uses transparent User Agents, I will reward their transparency and not block them unless they are being irresponsible.</li> <li>I won't block web scrapers unless they are being a burden to our website, and/or using the data in negative ways.</li> </ul> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="verdict">Verdict<a href="#verdict" class="hash-link" aria-label="Direct link to Verdict" title="Direct link to Verdict"></a></h2> <p>Web scraping is a fact of life on the modern internet, and is unlikely to change soon.</p> <p>However, that doesn't mean that web scrapers don't have <strong>a duty to behave responsibly and ethically</strong>.</p> <p>They need to take into account the burden they place on the websites they scrape and the damage the data they scrape might have on the others.</p> <p>Similarly, website owners have to acknowledge if you provide valuable data for free on the public web, then web scraping is likely to occur.</p> <p>If website owners were to provide free (or paid) APIs to their data, then a lot of web scraping would go away.</p> <p>It is in nobody's interest to spend time and money building web scrapers/proxy networks, and trying to block them on the other side. It would be cheaper for everyone involved to have access to the data via APIs.</p> <p>If you would like to learn more about web scraping in general, then be sure to check out <a href="/web-scraping-playbook/">The Web Scraping Playbook</a>. Or check out some our other popular articles like:</p> <ul> <li><a href="https://scrapeops.io/blog/the-state-of-web-scraping-2022/" target="_blank" rel="noopener noreferrer">The State of Web Scraping 2020</a></li> <li><a href="/web-scraping-playbook/what-is-web-scraping/">What is Web Scraping?</a></li> </ul></div><div class="col col--2"><div class="tableOfContents_bqdL thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#the-ethical-case-against-web-scraping" class="table-of-contents__link toc-highlight">The Ethical Case Against Web Scraping</a></li><li><a href="#the-ethical-case-for-web-scraping" class="table-of-contents__link toc-highlight">The Ethical Case For Web Scraping</a></li><li><a href="#the-ethical-hypocrisy-of-companies" class="table-of-contents__link toc-highlight">The Ethical Hypocrisy Of Companies</a></li><li><a href="#a-manifesto-for-the-responsible-scraper" class="table-of-contents__link toc-highlight">A Manifesto For The Responsible Scraper</a></li><li><a href="#be-a-truely-ethical-scraper" class="table-of-contents__link toc-highlight">Be A Truely Ethical Scraper</a></li><li><a href="#the-ethical-website-owner" class="table-of-contents__link toc-highlight">The Ethical Website Owner</a></li><li><a href="#verdict" class="table-of-contents__link toc-highlight">Verdict</a></li></ul></div></div></div></main></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Resources</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/intro/">Documentation</a></li><li class="footer__item"><a href="https://scrapeops.io/proxy-providers/comparison/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Proxy Comparison Tool</a></li><li class="footer__item"><a class="footer__link-item" href="/blog/">Blog</a></li><li class="footer__item"><a href="https://github.com/ScrapeOps" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li></ul></div><div class="col footer__col"><div class="footer__title">Web Scraping Guides</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/web-scraping-playbook/">Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/python-web-scraping-playbook/">Python Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/nodejs-web-scraping-playbook/">NodeJs Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/python-scrapy-playbook/">Python Scrapy Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/selenium-web-scraping-playbook/">Selenium Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/puppeteer-web-scraping-playbook/">Puppeteer Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/playwright-web-scraping-playbook/">Playwright Web Scraping Playbook</a></li></ul></div><div class="col footer__col"><div class="footer__title">Company</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/affiliate-program/">Affiliate Program</a></li><li class="footer__item"><a class="footer__link-item" href="/privacy-policy/">Privacy Policy</a></li><li class="footer__item"><a class="footer__link-item" href="/terms-of-service/">Terms Of Service</a></li><li class="footer__item"><a class="footer__link-item" href="/data-protection-policy/">Data Protection Policy</a></li><li class="footer__item"><a class="footer__link-item" href="/data-processing-agreement/">Data Processing Agreement</a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2024 ScrapeOps.</div></div></div></footer></div> </body> </html>