CINXE.COM
<!doctype html> <html lang="en" dir="ltr" class="plugin-pages plugin-id-default" data-has-hydrated="false"> <head> <meta charset="UTF-8"> <meta name="generator" content="Docusaurus v3.0.0-rc.1"> <title data-rh="true">How To Scrape Amazon.com Products & Reviews [2023] | ScrapeOps</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" property="og:url" content="https://scrapeops.io/web-scraping-playbook/how-to-scrape-amazon/"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docusaurus_tag" content="default"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docsearch:docusaurus_tag" content="default"><meta data-rh="true" name="robots" content="max-image-preview:large"><meta data-rh="true" property="og:title" content="How To Scrape Amazon.com Products & Reviews [2023] | ScrapeOps"><meta data-rh="true" name="description" content="Learn how to crawl Amazon.com for products and scrape products and reviews."><meta data-rh="true" property="og:description" content="Learn how to crawl Amazon.com for products and scrape products and reviews."><meta data-rh="true" property="og:image" content="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-search-page.png"><meta data-rh="true" property="twitter:image" content="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-search-page.png"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" name="twitter:site" content="@ScrapeOps"><link data-rh="true" rel="icon" href="/img/favicon.svg"><link data-rh="true" rel="canonical" href="https://scrapeops.io/web-scraping-playbook/how-to-scrape-amazon/"><link data-rh="true" rel="alternate" href="https://scrapeops.io/web-scraping-playbook/how-to-scrape-amazon/" hreflang="en"><link data-rh="true" rel="alternate" href="https://scrapeops.io/web-scraping-playbook/how-to-scrape-amazon/" hreflang="x-default"><script data-rh="true" type="application/ld+json"> { "@context": "https://schema.org", "@type": "Article", "headline": "How To Scrape Amazon.com Products & Reviews [2023]", "description": "Learn how to crawl Amazon.com for products and scrape products and reviews.", "image": "https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-search-page.png", "author": { "@type": "Person", "name": "Ian from ScrapeOps" }, "publisher": { "@type": "Organization", "name": "ScrapeOps", "logo": { "@type": "ImageObject", "url": "https://scrapeops.io/img/logo.png" } }, "datePublished": "2022-10-20T06:42:02Z", "dateModified": "2022-10-20T06:42:02Z", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://scrapeops.io/web-scraping-playbook/how-to-scrape-amazon/" } } </script><script data-rh="true" type="application/ld+json"> { "@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Home", "item": "https://scrapeops.io/"}, {"@type": "ListItem", "position": 2, "name": "Web Scraping Playbook", "item": "https://scrapeops.io/web-scraping-playbook/"}, {"@type": "ListItem", "position": 3, "name": "How To Scrape Amazon", "item": "https://scrapeops.io/web-scraping-playbook/how-to-scrape-amazon/"}] } </script><link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="ScrapeOps RSS Feed"> <link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="ScrapeOps Atom Feed"> <link rel="preconnect" href="https://www.google-analytics.com"> <link rel="preconnect" href="https://www.googletagmanager.com"> <script async src="https://www.googletagmanager.com/gtag/js?id=G-QJSW9S9YH4"></script> <script>function gtag(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],gtag("js",new Date),gtag("config","G-QJSW9S9YH4",{anonymize_ip:!0})</script> <script src="/get-refferer.js"></script> <script src="https://cdn.firstpromoter.com/fpr.js" async></script><link rel="stylesheet" href="/assets/css/styles.0a6705c8.css"> <script src="/assets/js/runtime~main.75ee2ef1.js" defer="defer"></script> <script src="/assets/js/main.2eada272.js" defer="defer"></script> </head> <body class="navigation-with-keyboard"> <script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const a=new URLSearchParams(window.location.search).entries();for(var[t,e]of a)if(t.startsWith("docusaurus-data-")){var n=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(n,e)}}catch(t){}}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_s0pr" style="background-color:#0d53d7;color:#fff" role="banner"><div class="announcementBarContent_dpRF">Need a proxy solution? Try ScrapeOps and get <a target="_self" href="https://scrapeops.io/app/register/proxy/">1,000 free requests here</a>, or compare all proxy providers <a target="_self" href="https://scrapeops.io/proxy-providers/comparison/">here</a>!</div></div><nav class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Navigation bar toggle" class="navbar__toggle clean-btn" type="button" tabindex="0"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/scrapeops-logo.svg" alt="ScrapeOps Logo" class="themedComponent_mlkZ themedComponent--light_NVdE" height="24px" width="18px"><img src="/img/scrapeops-logo.svg" alt="ScrapeOps Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU" height="24px" width="18px"></div><b class="navbar__title text--truncate">ScrapeOps</b></a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Solutions</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/proxy-aggregator/">Proxy Aggregator</a></li><li><a class="dropdown__link" href="/monitoring-scheduling/">Monitoring & Scheduler</a></li></ul></div><a class="navbar__item navbar__link" href="/docs/intro/">Docs</a><a href="https://scrapeops.io/proxy-providers/comparison/" target="_self" rel="noopener noreferrer" class="navbar__item navbar__link">Proxy Comparison<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Guides</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/web-scraping-playbook/">Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/python-web-scraping-playbook/">Python Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/nodejs-web-scraping-playbook/">NodeJs Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/python-scrapy-playbook/">Python Scrapy Playbook</a></li><li><a class="dropdown__link" href="/selenium-web-scraping-playbook/">Selenium Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/puppeteer-web-scraping-playbook/">Puppeteer Web Scraping Playbook</a></li><li><a class="dropdown__link" href="/playwright-web-scraping-playbook/">Playwright Web Scraping Playbook</a></li></ul></div><a href="https://scrapeops.io/app/login/" target="_self" rel="noopener noreferrer" class="navbar__item navbar__link">Login<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><a href="https://scrapeops.io/app/register/main/" target="_self" rel="noopener noreferrer" class="navbar__item navbar__link">Signup<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div class="main-wrapper mdx-wrapper"><main class="container container--fluid margin-vert--lg"><div class="row mdxPageWrapper_yV3F"><div class="col col--8"> <h1>How To Scrape Amazon.com Products & Reviews [2023]</h1> <p>In this guide for our <strong>"How To Scrape X"</strong> series, we're going to look at how to scrape <a href="https://www.amazon.com/" target="_blank" rel="noopener noreferrer">Amazon.com</a>.</p> <p><a href="https://www.amazon.com/" target="_blank" rel="noopener noreferrer">Amazon</a>, is the most popular website for web scrapers with billions of product pages being scraped every month.</p> <p>So in this guide we will go through:</p> <ul> <li><a href="#how-to-build-a-list-of-amazon-product-urls">How To Build A List Of Amazon Product URLs</a></li> <li><a href="#how-to-scrape-product-data-from-amazon-product-search-pages">How To Scrape Product Data From Amazon Product Search Pages</a></li> <li><a href="#how-to-scrape-product-data-from-amazon-product-pages">How To Scrape Product Data From Amazon Product Pages</a></li> <li><a href="#how-to-scrape-product-reviews-from-amazon-review-pages">How To Scrape Product Reviews From Amazon Review Pages</a></li> <li><a href="#bypassing-amazons-anti-bot-protection">Bypassing Amazon's Anti-Bot Protection</a></li> </ul> <!-- --> <!-- --> <div style="background-color:#f6fafe;border-radius:2px;border:1px solid #f0f0f3;padding:1.5rem;padding-bottom:2.5rem;padding-top:2.5rem;text-align:center;margin-top:2rem"><h3>Need help scraping the web?</h3><p style="margin-bottom:2rem">Then check out <a href="https://scrapeops.io/?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=how-to-scrape-amazon" style="color:#000;text-decoration:none;border-bottom:2px dashed #0d53d7;font-weight:600">ScrapeOps</a>, the complete toolkit for web scraping.</p><div style="display:flex;flex-direction:row;flex-wrap:wrap;justify-content:center"><div><div><img style="height:80px;width:80px" src="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Icons/scrapeops-proxy-aggregator-icon.svg" alt="ScrapeOps Proxy Manager"></div><div><a href="https://scrapeops.io/proxy-aggregator/?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=how-to-scrape-amazon" style="color:#000;text-decoration:none;font-weight:600">Proxy Manager</a></div></div><div style="margin-left:3.5rem;margin-right:3.5rem"><div><img style="height:80px;width:80px" src="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Icons/scrapeops-monitoring-icon.svg" alt="ScrapeOps Monitoring"></div><div><a href="https://scrapeops.io/monitoring-scheduling/?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=how-to-scrape-amazon" style="color:#000;text-decoration:none;font-weight:600">Scraper Monitoring</a></div></div><div><div><img style="height:80px;width:80px" src="https://assets-scrapeops.nyc3.digitaloceanspaces.com/Icons/scrapeops-scheduler-icon.svg" alt="ScrapeOps Job Scheduling"></div><div><a href="https://scrapeops.io/monitoring-scheduling?utm_campaign=web-scraping-playbook&utm_source=article&utm_medium=organic&utm_content=how-to-scrape-amazon" style="color:#000;text-decoration:none;font-weight:600">Job Scheduling</a></div></div></div></div> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="how-to-build-a-list-of-amazon-product-urls">How To Build A List Of Amazon Product URLs<a href="#how-to-build-a-list-of-amazon-product-urls" class="hash-link" aria-label="Direct link to How To Build A List Of Amazon Product URLs" title="Direct link to How To Build A List Of Amazon Product URLs"></a></h2> <p>The first part of scraping Amazon is designing a web crawler that will generate a list of product URLs for our scrapers to scrape.</p> <p>For example, here is a product URL for a <strong>iPad</strong>:</p> <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/2021-Apple-10-2-inch-iPad-Wi-Fi/dp/B09G9FPHY6/ref=sr_1_1'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Alternatively Use Amazon ASINs</div><div class="admonitionContent_BuS1"><p>The alternative approach is to crawl Amazon for <strong>ASIN (Amazon Standard Identification Number) codes</strong>. Every product listed on Amazon has its own unique ASIN code, which you can use to construct URLs to scrape that product page, reviews, or other sellers.</p><p>For example, you can retrieve the product page of any product using its ASIN:</p><div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">## URL Structure</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/dp/ASIN'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">## Example</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/dp/B09G9FPHY6'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div></div> <p>With <strong>Amazon.com</strong> the easiest way to do this is to use the Amazon Search page which returns <strong>up to 20 products</strong> per page.</p> <p>For example, here is how we would get search results for <strong>iPads</strong>.</p> <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/s?k=iPads&page=1'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>This URL contains a number of parameters that we will explain:</p> <ul> <li><code>k</code> stands for the search keyword. In our case, <code>k=ipad</code>. <strong>Note:</strong> If you want to search for a keyword that contains spaces or special characters then remember you need to encode this value.</li> <li><code>page</code> stands for the page number. In our cases, we've requested <code>page=1</code>.</li> </ul> <p>Using these parameters we can query the Amazon search endpoint to start building a list of URLs to scrape.</p> <p>Here is an example response page:</p> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-search-page.png" alt="How To Scrape Amazon.com Product Search Pages" fetchpriority="high" loading="eager" class="top-header-img img_CujE"></p> <p>To extract product URLs (and ASIN codes) from this page, we need to look through every product on this page, extract the relative URL to the product and the either create an absolute product URL or extract the ASIN.</p> <hr> <h3 class="anchor anchorWithStickyNavbar_LWe7" id="extracting-product-page-urls">Extracting Product Page URLs<a href="#extracting-product-page-urls" class="hash-link" aria-label="Direct link to Extracting Product Page URLs" title="Direct link to Extracting Product Page URLs"></a></h3> <p>Here is an example crawler that will extract product URLs from an <a href="https://www.amazon.com/s?k=iPads&page=1" target="_blank" rel="noopener noreferrer">Amazon Search Page</a> with <a href="https://requests.readthedocs.io/" target="_blank" rel="noopener noreferrer">Python Requests</a> & <a href="https://parsel.readthedocs.io/en/latest/index.html" target="_blank" rel="noopener noreferrer">Parsel</a>:</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_urls </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">keyword_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'ipad'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> keyword </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> keyword_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=1'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Extract Product Page URLs</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_products </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"div.s-result-item[data-component-type=s-search-result]"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> search_products</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> relative_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a::attr(href)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> urljoin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"?"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_urls</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <hr> <h3 class="anchor anchorWithStickyNavbar_LWe7" id="extracting-product-asins">Extracting Product ASINs<a href="#extracting-product-asins" class="hash-link" aria-label="Direct link to Extracting Product ASINs" title="Direct link to Extracting Product ASINs"></a></h3> <p>Here is an example where we extract product ASINs from the relative URL for the same <a href="https://www.amazon.com/s?k=iPads&page=1" target="_blank" rel="noopener noreferrer">Amazon Search Page</a> with <a href="https://requests.readthedocs.io/" target="_blank" rel="noopener noreferrer">Python Requests</a> & <a href="https://parsel.readthedocs.io/en/latest/index.html" target="_blank" rel="noopener noreferrer">Parsel</a>:</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_asins </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">keyword_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'ipad'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> keyword </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> keyword_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=1'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Extract Product ASINS</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_products </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"div.s-result-item[data-component-type=s-search-result]"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> search_products</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> relative_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a::attr(href)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> asin </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(130, 170, 255)">len</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token operator" style="color:rgb(127, 219, 202)">>=</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">4</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">else</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_asins</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">asin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <hr> <h3 class="anchor anchorWithStickyNavbar_LWe7" id="paginating-amazon-search-pages">Paginating Amazon Search Pages<a href="#paginating-amazon-search-pages" class="hash-link" aria-label="Direct link to Paginating Amazon Search Pages" title="Direct link to Paginating Amazon Search Pages"></a></h3> <p>The above code just scrapes the product URL and ASINs from the first page of the Amazon Search results. However, most of the time we will want to scrape the data from the other available pages.</p> <p>To do so we will need to configure our crawler to paginate through every available page for our search keywords and extract the data from those as well.</p> <p>In the below example we are going to extract the available page numbers and configure our scraper to requests those pages as well by adding them into our <code>url_list</code>.</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_urls </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">keyword_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'ipad'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> keyword </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> keyword_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=1'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> url </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Extract Product Page URLs</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_products </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"div.s-result-item[data-component-type=s-search-result]"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> search_products</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> relative_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a::attr(href)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> urljoin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"?"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_urls</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Get All Pages</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"&page=1"</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> available_pages </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">xpath</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'//a[has-class("s-pagination-item")][not(has-class("s-pagination-separator"))]/text()'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">getall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> page </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> available_pages</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_url_paginated </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">page</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">search_url_paginated</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="how-to-scrape-product-data-from-amazon-product-search-pages">How To Scrape Product Data From Amazon Product Search Pages<a href="#how-to-scrape-product-data-from-amazon-product-search-pages" class="hash-link" aria-label="Direct link to How To Scrape Product Data From Amazon Product Search Pages" title="Direct link to How To Scrape Product Data From Amazon Product Search Pages"></a></h2> <p>In the above examples, we saw how to extract product URLs and ASINs from Amazon Search pages so that we can scrape the product pages.</p> <p>However, if you only need basic product data (<strong>name</strong>, <strong>price</strong>, <strong>image URL</strong>, <strong>rating</strong>, <strong>number of reviews</strong>, etc) then you can scrape this data from the actual search pages.</p> <p>The advantage with this approach is that you will cut the number of requests you need to make to Amazon by a factor of 20. Making your scrapers faster and cheaper to run.</p> <p>Here is an example using <a href="https://requests.readthedocs.io/" target="_blank" rel="noopener noreferrer">Python Requests</a> & <a href="https://parsel.readthedocs.io/en/latest/index.html" target="_blank" rel="noopener noreferrer">Parsel</a> that scrapes the product data from all available <a href="https://www.amazon.com/s?k=iPads&page=1" target="_blank" rel="noopener noreferrer">Amazon Search Pages</a>.</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">keyword_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'ipad'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_overview_data </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> keyword </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> keyword_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=1'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> url </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Extract Product Page</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_products </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"div.s-result-item[data-component-type=s-search-result]"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> search_products</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> relative_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a::attr(href)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">#print(relative_url.split('/'))</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> asin </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(130, 170, 255)">len</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token operator" style="color:rgb(127, 219, 202)">>=</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">4</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">else</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> urljoin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"?"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_overview_data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"keyword"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> keyword</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"asin"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> asin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"url"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"ad"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">True</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"/slredirect/"</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> product_url </span><span class="token keyword" style="color:rgb(127, 219, 202)">else</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">False</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"title"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a>span::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"price"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">".a-price[data-a-size=xl] .a-offscreen::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"real_price"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">".a-price[data-a-size=b] .a-offscreen::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[aria-label~=stars]::attr(aria-label)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">re</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">r"(\d+\.*\d*) out"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">or</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating_count"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[aria-label~=stars] + span::attr(aria-label)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"thumbnail_url"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">xpath</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"//img[has-class('s-image')]/@src"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Get All Pages</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"&page=1"</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> available_pages </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">xpath</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'//a[has-class("s-pagination-item")][not(has-class("s-pagination-separator"))]/text()'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">getall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> page </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> available_pages</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_url_paginated </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">page</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">search_url_paginated</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <div class="theme-admonition theme-admonition-tip admonition_xJq3 alert alert--success"><div class="admonitionHeading_Gvgb"><span class="admonitionIcon_Rf37"><svg viewBox="0 0 12 16"><path fill-rule="evenodd" d="M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"></path></svg></span>Web Scraping Ethics</div><div class="admonitionContent_BuS1"><p>If you can get away with only the product data available on the Amazon Search page then you should only scrape these pages. Going with this approach is more ethical as you will put less demand on the websites servers.</p></div></div> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="how-to-scrape-product-data-from-amazon-product-pages">How To Scrape Product Data From Amazon Product Pages<a href="#how-to-scrape-product-data-from-amazon-product-pages" class="hash-link" aria-label="Direct link to How To Scrape Product Data From Amazon Product Pages" title="Direct link to How To Scrape Product Data From Amazon Product Pages"></a></h2> <p>Once we have a list of Amazon product URLs then we can scrape all the product data from each individual Amazon product page.</p> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-product-page.png" alt="How To Scrape Amazon.com Product Pages" loading="lazy" class="img_CujE"></p> <p>Scraping product data from Amazon Product Pages is pretty straightforward.</p> <p>First we need to request the product page using either a full product URL like this:</p> <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/2021-Apple-10-2-inch-iPad-Wi-Fi/dp/B09G9FPHY6/ref=sr_1_1'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>Or one based solely off of the products ASIN code:</p> <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/dp/B09G9FPHY6'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>Then create parsers for every field we want to extract data for.</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> re</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_urls </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/2021-Apple-10-2-inch-iPad-Wi-Fi/dp/B09G9FPHY6/ref=sr_1_1'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_data_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product_url </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> product_urls</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> image_data </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> json</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">loads</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">re</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">findall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">r"colorImages':.*'initial':\s*(\[.+?\])},\n"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> variant_data </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> re</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">findall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">r'dimensionValuesDisplayData"\s*:\s* ({.+?}),\n'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> feature_bullets </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token plain">bullet</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">strip</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> bullet </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"#feature-bullets li ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">getall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> price </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'.a-price span[aria-hidden="true"] ::text'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">not</span><span class="token plain"> price</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> price </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'.a-price .a-offscreen ::text'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_data_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"name"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"#productTitle::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">strip</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"price"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> price</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"stars"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"i[data-hook=average-star-rating] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">strip</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating_count"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"div[data-hook=total-review-count] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">strip</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"feature_bullets"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> feature_bullets</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"images"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> image_data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"variant_data"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> variant_data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>In the above code, we scrape all the main product data from the page including product variant data.</p> <p>Here an example output:</p> <div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">{"name": "Apple iPad 9.7inch with WiFi 32GB- Space Gray (2017 Model) (Renewed)",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "price": "$137.00",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "stars": "4.6 out of 5 stars",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating_count": "8,532 global ratings",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "feature_bullets": [</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "Make sure this fits by entering your model number.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "9.7-Inch Retina Display, wide Color and True Tone",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "A9 third-generation chip with 64-bit architecture",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "M9 motion coprocessor, 1.2MP FaceTime HD Camera",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "8MP insight Camera, touch ID, Apple Pay"],</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "images": [{"hiRes": "https://m.media-amazon.com/images/I/51dBcW+NXPL._AC_SL1000_.jpg",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "thumb": "https://m.media-amazon.com/images/I/51pGtRLfaZL._AC_US40_.jpg",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "large": "https://m.media-amazon.com/images/I/51pGtRLfaZL._AC_.jpg",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "main": {...},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "variant": "MAIN",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "lowRes": None,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "shoppableScene": None},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"hiRes": "https://m.media-amazon.com/images/I/51c43obovcL._AC_SL1000_.jpg",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "thumb": "https://m.media-amazon.com/images/I/415--n36L8L._AC_US40_.jpg",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "large": "https://m.media-amazon.com/images/I/415--n36L8L._AC_.jpg",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "main": {...},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "variant": "PT01",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "lowRes": None,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "shoppableScene": None},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "variant_data": ["{`B074PXZ5GC`:[`9.7 inches`,`Wi-Fi`,`Silver`],`B00TJGN4NG`:[`16GB`,`Wi-Fi`,`White`],`B07F93611L`:[`5 Pack`,`Wi-Fi`,`Space grey`],`B074PWW6NS`:[`Refurbished`,`Wi-Fi`,`Black`],`B0725LCLYQ`:[`9.7`,`Wi-Fi`,`Space Gray`],`B07D3DDJ4L`:[`32GB`,`Wi-Fi`,`Space Gray`],`B07G9N7J3S`:[`32GB`,`Wi-Fi`,`Gold`]}"]}</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>However, you can easily expand this to scrape other data like <strong>delievery times</strong>, <strong>product specs</strong>, etc.</p> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="how-to-scrape-product-reviews-from-amazon-review-pages">How To Scrape Product Reviews From Amazon Review Pages<a href="#how-to-scrape-product-reviews-from-amazon-review-pages" class="hash-link" aria-label="Direct link to How To Scrape Product Reviews From Amazon Review Pages" title="Direct link to How To Scrape Product Reviews From Amazon Review Pages"></a></h2> <p>Another popular type of data to scrape from Amazon is product reviews.</p> <p>To request a product page you just need the products ASIN code and using the following URL format:</p> <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'https://www.amazon.com/product-reviews/B09G9FPHY6/'</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-product-reviews-page.png" alt="How To Scrape Amazon.com Product Review Pages" loading="lazy" class="img_CujE"></p> <p>The following code scrapes the product reviews for the target product.</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">reviews </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_review_url_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/product-reviews/B09G9FPHY6/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product_review_url </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> product_review_url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product_review_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Parse Product Reviews</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> review_elements </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"#cm_cr-review_list div.review"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> review_element </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> review_elements</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> reviews</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">join</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[data-hook=review-body] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">getall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">strip</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"title"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"*[data-hook=review-title]>span::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"location_and_date"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[data-hook=review-date] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"verified"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(130, 170, 255)">bool</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[data-hook=avp-badge] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"*[data-hook*=review-star-rating] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">re</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">r"(\d+\.*\d*) out"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>The output of this code will look like this:</p> <div class="language-json codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-json codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">[{"text": "Ok..little old lady here, whose working life consisted of nothing but years and years of Windows, android phones, etc. Just in last several years jumped hesitantly into Apple (phone, Ipad mini, etc.)LOVE LOVE LOVE my iPad mini but, thought..might be time to think about replacement..so, I saw the great price on this 10 inch tablet and thought Id take a chance. I am much more partial to the mini sized tablets, but thought Id go for it...soooo, even after reading all the bu.......t comments here, thought Id try, if i didnt like it., Id return it. 1. Delivered on time, yayyy! 2. Package well protected, sealed, unblemished...perfect condition (and yeah..no fingerprints on screen) 3. Ipad fired right up...70% charged 4. Ipad immediately began transferring info from iPhone that was sitting nearby. Yayyyyy!! No need for reams of books, booklets, warnings, etc., etc.!! 5. EVERYTHING transferred from iPhone and IPad Mini...and I still had some 15 gig storage left on new 64 gig iPad (just remember ...this is for my entertainment...not for work with diagrams, idiotic work related emails about cleaning up my workspace, or 20 specs for items no one will ever use) 6. Did a test run...everything worked exactly as I required, expected. 7. Ultimate test...watched old Morse/Poirot shows I have in Prime..excellent quality! love love love 8. After 8 full hours...I had to recharge for a bit before I went to bed. (charged fairly fast!)sooooooo...Im keeping this jewel!!!!!Risk is there...evidently, if you believe the nutso crowd and their comments here. Its a GREAT item, its a fabulous deal, Christmas is coming...or if you need to have a worthy backup..,...DO IT!!!!",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "EXCELLENT buy!",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 10, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "If you’re anything like me you want something to watch shows on in you living room or bed, but you don’t like the TV, and your phone is too small. Well this is the perfect thing for you, the screen is just the right size and very crisp and clear(maybe better then my iPhone X), the responsiveness is excellent, and all of the streaming sites work with this perfectly. On top of that, my AirPods automatically switch between this and my phone, so I don’t have to worry about messing with the settings every time. However, the camera is only OK. And it feels very delicate, so I would pick up a case and get AppleCare+. The battery isn’t the best either, but should be enough to get through the day. Overall I definitely recommend this, especially for the price.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Perfect",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 13, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "My old IPad was acting up, wouldn’t hold a charge etc. This iPad arrived the very next day after I ordered it. What a great surprise. The one corner of the outer box it arrived in was damaged, but the inner box containing the iPad was in perfect condition. It was so simple to transfer everything from my old iPad to this one, just laid the new one on the old (iPad 2019) and it did pretty much everything on its own. I am very pleased with my purchase, I hope it lasts longer than my 2019 model.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Great purchase",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 15, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "Im not much of an apple product person but I do buy them for people I dont want to provide tech support to. (Parents, In-laws, Wife, and Kids)I used to use the fire tablets because they were cheap and I thought that would keep the kids entertained, especially on road trips. This worked for movies and some games but there were always problems with how slow they become with updates, loss of battery life, etc.This ipad was a game changer. I always knew they were the best tablets but I was also a bit in denial as well as just being somewhat anti-apple. With this on sale during prime day 2022 (July) I took a chance and bought one for the kids.This does everything as well or better (usually better) than previous tablets I had purchased because they were cheaper.I also didnt buy a case for it and my kids are brutal with these types of devices. To date, it is still in one piece, operational, and has no cracks in the screen.Sometimes it is worth paying a bit more for the name brand product and in this case Im a believer.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Kids love it",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 2, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "For those who wonder, this is brand new in the box, 2021 9th generation. It is NOT refurbed or an exchange. It is never opened and shrink wrapped by Apple. (See my photos.) The reason it is so much cheaper than the other 2021 iPads is the 64gb storage. But with iCloud so ridiculously cheap for cloud storage, I just cannot see this 64gb as not getting the job done. I myself was curious about this low price buying me a refurb/exchange, but that is simply not the case here. I do, however, recommend you not go with 32gb. I believe even with an iCloud account, you will be sorry you didn’t go 64gb.And the ease of setting this up cannot be understated. I simply sat my iPhone 13 Pro Max next to it and all relevant files and Wi-Fi passwords were transferred over with no input from me. It looked to me that it will do that with Android and most laptops also, though I did not test that out. All photos also came over, and the ones I took after that transfer, I simply Air-Dropped them into this iPad. All in all, this is as simple as it gets for transferring files and photos. Apple has this stuff down to a science, believe me.This screen is incredible. If you are looking at a pre-Retina screen, you will be amazed at this 2021 version. This thing is very fast, the on screen keyboard is fast, accurate and very concise. Dealing with apps is easy, and Apple doesn’t load you down with bloat you’ll never use. It is claimed this has about 12 hours on a charge; what I’ve seen thus far leads me to believe that is accurate.All in all, I am extremely pleased with this purchase. You can’t always say you got what you paid for. But I can definitely say that with this. This is the entry level 2021 9th generation iPad, and it is exactly what I need. Go and get you one…",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Incredible deal on incredible machine",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on September 21, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "I bought this for my husband. He loves it! It is the gift that really does keep on giving. It arrived quickly, well packaged and I didn’t have to leave my house to get it. It was great to use my iPad to purchase this one as a gift and have it arrive safe.y. Thank you, Amazon!",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Best gift 🎁",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 15, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "I have had an ipad air since they came out. I used hotel points to get it and its served me well as a book and simple internet use. Recently I noticed that it was no updating and some of my favorite apps were telling me they were using an old version because my IOS was outdated. Without being able to update it I decided to pass my old one on and get a new one. Then I thought Id get a mini 6 but after comparing the prices and the ability I could not justify a double price for it. I ordered this Ipad 9 and it came quicker than expected. Out of the box it performs much better than my old one, screen appears clearer and I like the new IOS it uses. My old one will live on as a small tv for my wife when shes in the kitchen and for that it does very well. I have no complaints about my new one. Its easy to talk yourself into the top of the curve, but sometimes being a bit behind it makes better fiscal sense",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "My old Ipad was too old to update, so it was passed down,",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on September 25, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "I have always been an android user. I finally dipped my toe into Apple. There is a learning curve, I do not speak Apple. Thankfully I have grandchildren and they have taught me a lot. Dont snooze on this one, I love it, fast, images clearer, pics, videos, pen, everything about this one is great. I now get the Apple craze.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Perfect size and performance",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 13, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "El iPad es una tableta muy fácil de usar y muy práctica puedes hacer casi todo lo que necesitas en el día a día, oficina, escuela, entretenimiento, productividad, y con 256gb tengo para almacenar mucha información.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "El iPad es la mejor tableta que existe",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 14, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"},</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> {"text": "Thought I was gonna get a knock off for the price but came brand new, no problems what so ever. Amazing battery life I charge it every two days and use it constantly at school and work for studying and job demands.",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "title": "Excellent product",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "location_and_date": "Reviewed in the United States 🇺🇸 on October 14, 2022",</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "verified": true,</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> "rating": "5.0"}]</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <hr> <h3 class="anchor anchorWithStickyNavbar_LWe7" id="paginating-product-review-pages">Paginating Product Review Pages<a href="#paginating-product-review-pages" class="hash-link" aria-label="Direct link to Paginating Product Review Pages" title="Direct link to Paginating Product Review Pages"></a></h3> <p>The above code works, but it just extracts all the product reviews from a single Amazon reviews page.</p> <p>However, we can expand the scraper to paginate through all the product review pages and scrape the product reviews from every page by checking if there is another page.</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">reviews </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_review_url_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/product-reviews/B09G9FPHY6/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product_review_url </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> product_review_url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product_review_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Get Next Page Url</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> next_page_relative_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">".a-pagination .a-last>a::attr(href)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> next_page_relative_url </span><span class="token keyword" style="color:rgb(127, 219, 202)">is</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">not</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> next_page </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> urljoin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> next_page_relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_review_url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">next_page</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Parse Product Reviews</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> review_elements </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"#cm_cr-review_list div.review"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> review_element </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> review_elements</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> reviews</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">""</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">join</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[data-hook=review-body] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">getall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">strip</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"title"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"*[data-hook=review-title]>span::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"location_and_date"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[data-hook=review-date] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"verified"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(130, 170, 255)">bool</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[data-hook=avp-badge] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> review_element</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"*[data-hook*=review-star-rating] ::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">re</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">r"(\d+\.*\d*) out"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="bypassing-amazons-anti-bot-protection">Bypassing Amazon's Anti-Bot Protection<a href="#bypassing-amazons-anti-bot-protection" class="hash-link" aria-label="Direct link to Bypassing Amazon's Anti-Bot Protection" title="Direct link to Bypassing Amazon's Anti-Bot Protection"></a></h2> <p>As you might have seen already if you run this code Amazon might be blocking you and returning a error page like this:</p> <p><img src="https://res.cloudinary.com/dyaskan9k/image/fetch/f_auto,q_auto/https://assets-scrapeops.nyc3.digitaloceanspaces.com/Images/Playbooks/Web-Scraping-Playbook/how-to-scrape-amazon/how-to-scrape-amazon-blocked-page.png" alt="How To Scrape Amazon.com - Error Page" loading="lazy" class="img_CujE"></p> <p>This is because Amazon uses anti-bot protection to try and prevent (or at least make it harder) developers from scraping their site.</p> <p>Amazon outright blocks any requests that identify themselves as coming from a HTTP client like <a href="https://requests.readthedocs.io/" target="_blank" rel="noopener noreferrer">Python Requests</a> in this case.</p> <p>For example, when you make a request with <strong>Python Requests</strong> it sends the following user-agent with the request (this is why the above code examples won't work).</p> <div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">'User-Agent': 'python-requests/2.26.0',</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>This user agent clearly identifies your requests as being made by the Python Requests library, so Amazon can easily block you from scraping the site.</p> <p>Even if you do use a <strong>fake User-Agent</strong>, Amazon analyses the other headers to check if they match the typical headers a browser would send.</p> <p>That is why you should be sending full browser headers when scraping Amazon. And if you are scraping at scale you will need to spread your requests over hundreds if not thousands of proxies to hide your scrapers identity.</p> <p>We have written about how to solve these challenges here:</p> <ul> <li><a href="/web-scraping-playbook/web-scraping-without-getting-blocked/">Guide to Web Scraping Without Getting Blocked</a></li> <li><a href="/web-scraping-playbook/web-scraping-guide-header-user-agents/">Web Scraping Guide: Headers & User-Agents Optimization Checklist</a></li> <li><a href="/python-web-scraping-playbook/python-fake-user-agents/">Python Fake User-Agents: How to Manage User Agents When Scraping</a></li> <li><a href="/python-web-scraping-playbook/python-requests-proxy-rotation/">Python Requests: How to Use & Rotate Proxies</a></li> </ul> <p>However, if you don't want to implement all this anti-bot bypassing logic yourself the easier option is to use a smart proxy solution like <a href="/proxy-aggregator/">ScrapeOps Proxy Aggregator</a>.</p> <p>The <strong>ScrapeOps Proxy Aggregator</strong> is a smart proxy that handles everything for you:</p> <ul> <li>Proxy rotation & selection</li> <li>Rotating user-agents & browser headers</li> <li>Ban detection & CAPTCHA bypassing</li> <li>Country IP geotargeting</li> <li>Javascript rendering with headless browsers</li> </ul> <p>To use the <strong>ScrapeOps Proxy Aggregator</strong>, we just need to send the URL we want to scrape to the Proxy API instead of making the request directly ourselves. We can do this with a simple wrapper function:</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">SCRAPEOPS_API_KEY </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'YOUR_API_KEY'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">scrapeops_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> payload </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string" style="color:rgb(173, 219, 103)">'api_key'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> SCRAPEOPS_API_KEY</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'url'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'country'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'us'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> proxy_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'https://proxy.scrapeops.io/v1/?'</span><span class="token plain"> </span><span class="token operator" style="color:rgb(127, 219, 202)">+</span><span class="token plain"> urlencode</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">payload</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">return</span><span class="token plain"> proxy_url</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">amazon_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/s?k=iPads&page=1'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Send URL To ScrapeOps Instead of Amazon </span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">scrapeops_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">amazon_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>You can get a <strong>API key</strong> with <strong>1,000 free API credits</strong> by signing up <a href="https://scrapeops.io/app/register/proxy/" target="_blank" rel="noopener noreferrer">here</a>.</p> <p>Here is our updated <strong>Amazon Search Crawler</strong> to use the <strong>ScrapeOps Proxy</strong>:</p> <div class="language-python codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#d6deeb;--prism-background-color:#011627"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-python codeBlock_bY9V thin-scrollbar" style="color:#d6deeb;background-color:#011627"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> requests</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> parsel </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> Selector</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">from</span><span class="token plain"> urllib</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">parse </span><span class="token keyword" style="color:rgb(127, 219, 202)">import</span><span class="token plain"> urlencode</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> urljoin</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">API_KEY </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'YOUR_API_KEY'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">def</span><span class="token plain"> </span><span class="token function" style="color:rgb(130, 170, 255)">scrapeops_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> payload </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string" style="color:rgb(173, 219, 103)">'api_key'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> API_KEY</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'url'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'country'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'us'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> proxy_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'https://proxy.scrapeops.io/v1/?'</span><span class="token plain"> </span><span class="token operator" style="color:rgb(127, 219, 202)">+</span><span class="token plain"> urlencode</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">payload</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">return</span><span class="token plain"> proxy_url</span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">keyword_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string" style="color:rgb(173, 219, 103)">'ipad'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain">product_overview_data </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> keyword </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> keyword_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url_list </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=1'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> url </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">try</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> response </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> requests</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">scrapeops_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">status_code </span><span class="token operator" style="color:rgb(127, 219, 202)">==</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">200</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> sel </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> Selector</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">text</span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain">response</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">text</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Extract Product Data From Search Page</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_products </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"div.s-result-item[data-component-type=s-search-result]"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> product </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> search_products</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> relative_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a::attr(href)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> asin </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">3</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token builtin" style="color:rgb(130, 170, 255)">len</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token operator" style="color:rgb(127, 219, 202)">>=</span><span class="token plain"> </span><span class="token number" style="color:rgb(247, 140, 108)">4</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">else</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_url </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> urljoin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">'https://www.amazon.com/'</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> relative_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">split</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"?"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> product_overview_data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"keyword"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> keyword</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"asin"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> asin</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"url"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product_url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"ad"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">True</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"/slredirect/"</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> product_url </span><span class="token keyword" style="color:rgb(127, 219, 202)">else</span><span class="token plain"> </span><span class="token boolean" style="color:rgb(255, 88, 116)">False</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"title"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"h2>a>span::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"price"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">".a-price[data-a-size=xl] .a-offscreen::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"real_price"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">".a-price[data-a-size=b] .a-offscreen::text"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[aria-label~=stars]::attr(aria-label)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">re</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">r"(\d+\.*\d*) out"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">or</span><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token boolean" style="color:rgb(255, 88, 116)">None</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">[</span><span class="token number" style="color:rgb(247, 140, 108)">0</span><span class="token punctuation" style="color:rgb(199, 146, 234)">]</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"rating_count"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">css</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"span[aria-label~=stars] + span::attr(aria-label)"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"thumbnail_url"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"> product</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">xpath</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"//img[has-class('s-image')]/@src"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">get</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token comment" style="color:rgb(99, 119, 119);font-style:italic">## Get All Pages</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">if</span><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">"&page=1"</span><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> url</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> available_pages </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> sel</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">xpath</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token string" style="color:rgb(173, 219, 103)">'//a[has-class("s-pagination-item")][not(has-class("s-pagination-separator"))]/text()'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">getall</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">for</span><span class="token plain"> page </span><span class="token keyword" style="color:rgb(127, 219, 202)">in</span><span class="token plain"> available_pages</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> search_url_paginated </span><span class="token operator" style="color:rgb(127, 219, 202)">=</span><span class="token plain"> </span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">f'https://www.amazon.com/s?k=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">keyword</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">&page=</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">{</span><span class="token string-interpolation interpolation">page</span><span class="token string-interpolation interpolation punctuation" style="color:rgb(199, 146, 234)">}</span><span class="token string-interpolation string" style="color:rgb(173, 219, 103)">'</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> url_list</span><span class="token punctuation" style="color:rgb(199, 146, 234)">.</span><span class="token plain">append</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">search_url_paginated</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">except</span><span class="token plain"> Exception </span><span class="token keyword" style="color:rgb(127, 219, 202)">as</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">:</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"> </span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token string" style="color:rgb(173, 219, 103)">"Error"</span><span class="token punctuation" style="color:rgb(199, 146, 234)">,</span><span class="token plain"> e</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain"></span><span class="token keyword" style="color:rgb(127, 219, 202)">print</span><span class="token punctuation" style="color:rgb(199, 146, 234)">(</span><span class="token plain">product_overview_data</span><span class="token punctuation" style="color:rgb(199, 146, 234)">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#d6deeb"><span class="token plain" style="display:inline-block"></span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div> <p>Now when we make requests with our scraper Amazon won't be block them.</p> <hr> <h2 class="anchor anchorWithStickyNavbar_LWe7" id="more-web-scraping-guides">More Web Scraping Guides<a href="#more-web-scraping-guides" class="hash-link" aria-label="Direct link to More Web Scraping Guides" title="Direct link to More Web Scraping Guides"></a></h2> <p>In this edition of our <strong>"How To Scrape X" series</strong>, we went through how you can scrape <strong>Amazon.com</strong> including how to bypass its anti-bot protection.</p> <p>If you would like to learn how to scrape other popular websites then check out our other <strong>How To Scrape Guides</strong>:</p> <ul> <li><a href="/web-scraping-playbook/how-to-scrape-walmart/">How To Scrape Walmart.com</a></li> <li><a href="/web-scraping-playbook/how-to-scrape-indeed/">How To Scrape Indeed.com</a></li> </ul> <p>Or if you would like to learn more about web scraping in general, then be sure to check out <a href="/web-scraping-playbook/">The Web Scraping Playbook</a>, or check out one of our more in-depth guides:</p> <ul> <li><a href="https://scrapeops.io/blog/the-state-of-web-scraping-2022/" target="_blank" rel="noopener noreferrer">The State of Web Scraping 2020</a></li> <li><a href="/web-scraping-playbook/ethics-of-web-scraping/">The Ethics of Web Scraping</a></li> </ul></div><div class="col col--2"><div class="tableOfContents_bqdL thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#how-to-build-a-list-of-amazon-product-urls" class="table-of-contents__link toc-highlight">How To Build A List Of Amazon Product URLs</a><ul><li><a href="#extracting-product-page-urls" class="table-of-contents__link toc-highlight">Extracting Product Page URLs</a></li><li><a href="#extracting-product-asins" class="table-of-contents__link toc-highlight">Extracting Product ASINs</a></li><li><a href="#paginating-amazon-search-pages" class="table-of-contents__link toc-highlight">Paginating Amazon Search Pages</a></li></ul></li><li><a href="#how-to-scrape-product-data-from-amazon-product-search-pages" class="table-of-contents__link toc-highlight">How To Scrape Product Data From Amazon Product Search Pages</a></li><li><a href="#how-to-scrape-product-data-from-amazon-product-pages" class="table-of-contents__link toc-highlight">How To Scrape Product Data From Amazon Product Pages</a></li><li><a href="#how-to-scrape-product-reviews-from-amazon-review-pages" class="table-of-contents__link toc-highlight">How To Scrape Product Reviews From Amazon Review Pages</a><ul><li><a href="#paginating-product-review-pages" class="table-of-contents__link toc-highlight">Paginating Product Review Pages</a></li></ul></li><li><a href="#bypassing-amazons-anti-bot-protection" class="table-of-contents__link toc-highlight">Bypassing Amazon's Anti-Bot Protection</a></li><li><a href="#more-web-scraping-guides" class="table-of-contents__link toc-highlight">More Web Scraping Guides</a></li></ul></div></div></div></main></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Resources</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/intro/">Documentation</a></li><li class="footer__item"><a href="https://scrapeops.io/proxy-providers/comparison/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Proxy Comparison Tool</a></li><li class="footer__item"><a class="footer__link-item" href="/blog/">Blog</a></li><li class="footer__item"><a href="https://github.com/ScrapeOps" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub</a></li></ul></div><div class="col footer__col"><div class="footer__title">Web Scraping Guides</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/web-scraping-playbook/">Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/python-web-scraping-playbook/">Python Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/nodejs-web-scraping-playbook/">NodeJs Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/python-scrapy-playbook/">Python Scrapy Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/selenium-web-scraping-playbook/">Selenium Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/puppeteer-web-scraping-playbook/">Puppeteer Web Scraping Playbook</a></li><li class="footer__item"><a class="footer__link-item" href="/playwright-web-scraping-playbook/">Playwright Web Scraping Playbook</a></li></ul></div><div class="col footer__col"><div class="footer__title">Company</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/affiliate-program/">Affiliate Program</a></li><li class="footer__item"><a class="footer__link-item" href="/privacy-policy/">Privacy Policy</a></li><li class="footer__item"><a class="footer__link-item" href="/terms-of-service/">Terms Of Service</a></li><li class="footer__item"><a class="footer__link-item" href="/data-protection-policy/">Data Protection Policy</a></li><li class="footer__item"><a class="footer__link-item" href="/data-processing-agreement/">Data Processing Agreement</a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2024 ScrapeOps.</div></div></div></footer></div> </body> </html>