[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"project-10":3},{"id":4,"name":5,"fullName":6,"owner":7,"repo":5,"description":8,"homepage":9,"htmlUrl":10,"language":11,"languages":9,"totalLinesOfCode":9,"stars":12,"forks":13,"watchers":14,"openIssues":15,"contributorsCount":9,"subscribersCount":16,"size":16,"stars1d":17,"stars7d":18,"stars30d":19,"stars90d":16,"forks30d":16,"starsTrendScore":20,"compositeScore":21,"rankGlobal":9,"rankLanguage":9,"license":9,"archived":22,"fork":22,"defaultBranch":23,"hasWiki":22,"hasPages":22,"topics":24,"createdAt":9,"pushedAt":9,"updatedAt":45,"readmeContent":46,"aiSummary":47,"trendingCount":16,"starSnapshotCount":16,"syncStatus":15,"lastSyncTime":48,"discoverSource":49},10,"Scrapling","D4Vinci\u002FScrapling","D4Vinci","🕷️ An adaptive Web Scraping framework that handles everything from a single request to a full-scale crawl!",null,"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling","Python",63013,6141,232,2,0,247,2738,14503,1314,117,false,"main",[25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44],"crawler","crawling","crawling-python","playwright","python","scraping","selectors","stealth","web-scraper","web-scraping","web-scraping-python","webscraping","xpath","automation","ai","ai-scraping","data","data-extraction","mcp","mcp-server","2026-06-11 04:00:16","\u003C!-- mcp-name: io.github.D4Vinci\u002FScrapling -->\n\n\u003Ch1 align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\">\n        \u003Cpicture>\n          \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fdocs\u002Fassets\u002Fcover_dark.svg?sanitize=true\">\n          \u003Cimg alt=\"Scrapling Poster\" src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fdocs\u002Fassets\u002Fcover_light.svg?sanitize=true\">\n        \u003C\u002Fpicture>\n    \u003C\u002Fa>\n    \u003Cbr>\n    \u003Csmall>Effortless Web Scraping for the Modern Web\u003C\u002Fsmall>\n\u003C\u002Fh1>\n\n\u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Ftrendshift.io\u002Frepositories\u002F14244\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Ftrendshift.io\u002Fapi\u002Fbadge\u002Frepositories\u002F14244\" alt=\"D4Vinci%2FScrapling | Trendshift\" style=\"width: 250px; height: 55px;\" width=\"250\" height=\"55\"\u002F>\u003C\u002Fa>\n    \u003Cbr\u002F>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_AR.md\">العربيه\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_ES.md\">Español\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_PT_BR.md\">Português (Brasil)\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_FR.md\">Français\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_DE.md\">Deutsch\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_CN.md\">简体中文\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_JP.md\">日本語\u003C\u002Fa> |  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_RU.md\">Русский\u003C\u002Fa> | \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fdocs\u002FREADME_KR.md\">한국어\u003C\u002Fa>\n    \u003Cbr\u002F>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Factions\u002Fworkflows\u002Ftests.yml\" alt=\"Tests\">\n        \u003Cimg alt=\"Tests\" src=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Factions\u002Fworkflows\u002Ftests.yml\u002Fbadge.svg\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fbadge.fury.io\u002Fpy\u002FScrapling\" alt=\"PyPI version\">\n        \u003Cimg alt=\"PyPI version\" src=\"https:\u002F\u002Fbadge.fury.io\u002Fpy\u002FScrapling.svg\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fclickpy.clickhouse.com\u002Fdashboard\u002Fscrapling\" rel=\"nofollow\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fdm\u002Fscrapling\" alt=\"PyPI package downloads\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Ftree\u002Fmain\u002Fagent-skill\" alt=\"AI Agent Skill directory\">\n        \u003Cimg alt=\"Static Badge\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FSkill-black?style=flat&label=Agent&link=https%3A%2F%2Fgithub.com%2FD4Vinci%2FScrapling%2Ftree%2Fmain%2Fagent-skill\">\u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fclawhub.ai\u002FD4Vinci\u002Fscrapling-official\" alt=\"OpenClaw Skill\">\n        \u003Cimg alt=\"OpenClaw Skill\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FClawhub-darkred?style=flat&label=OpenClaw&link=https%3A%2F%2Fclawhub.ai%2FD4Vinci%2Fscrapling-official\">\u003C\u002Fa>\n    \u003Cbr\u002F>\n    \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FEMgGbDceNQ\" alt=\"Discord\" target=\"_blank\">\n      \u003Cimg alt=\"Discord\" src=\"https:\u002F\u002Fimg.shields.io\u002Fdiscord\u002F1360786381042880532?style=social&logo=discord&link=https%3A%2F%2Fdiscord.gg%2FEMgGbDceNQ\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fx.com\u002FScrapling_dev\" alt=\"X (formerly Twitter)\">\n      \u003Cimg alt=\"X (formerly Twitter) Follow\" src=\"https:\u002F\u002Fimg.shields.io\u002Ftwitter\u002Ffollow\u002FScrapling_dev?style=social&logo=x&link=https%3A%2F%2Fx.com%2FScrapling_dev\">\n    \u003C\u002Fa>\n    \u003Cbr\u002F>\n    \u003Ca href=\"https:\u002F\u002Fpypi.org\u002Fproject\u002Fscrapling\u002F\" alt=\"Supported Python versions\">\n        \u003Cimg alt=\"Supported Python versions\" src=\"https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fpyversions\u002Fscrapling.svg\">\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002Fparsing\u002Fselection.html\">\u003Cstrong>Selection methods\u003C\u002Fstrong>\u003C\u002Fa>\n    &middot;\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002Ffetching\u002Fchoosing.html\">\u003Cstrong>Fetchers\u003C\u002Fstrong>\u003C\u002Fa>\n    &middot;\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002Fspiders\u002Farchitecture.html\">\u003Cstrong>Spiders\u003C\u002Fstrong>\u003C\u002Fa>\n    &middot;\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002Fspiders\u002Fproxy-blocking.html\">\u003Cstrong>Proxy Rotation\u003C\u002Fstrong>\u003C\u002Fa>\n    &middot;\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002Fcli\u002Foverview.html\">\u003Cstrong>CLI\u003C\u002Fstrong>\u003C\u002Fa>\n    &middot;\n    \u003Ca href=\"https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002Fai\u002Fmcp-server.html\">\u003Cstrong>MCP\u003C\u002Fstrong>\u003C\u002Fa>\n\u003C\u002Fp>\n\nScrapling is an adaptive Web Scraping framework that handles everything from a single request to a full-scale crawl.\n\nIts parser learns from website changes and automatically relocates your elements when pages update. Its fetchers bypass anti-bot systems like Cloudflare Turnstile out of the box. And its spider framework lets you scale up to concurrent, multi-session crawls with pause\u002Fresume and automatic proxy rotation - all in a few lines of Python. One library, zero compromises.\n\nBlazing fast crawls with real-time stats and streaming. Built by Web Scrapers for Web Scrapers and regular users, there's something for everyone.\n\n```python\nfrom scrapling.fetchers import Fetcher, AsyncFetcher, StealthyFetcher, DynamicFetcher\nStealthyFetcher.adaptive = True\np = StealthyFetcher.fetch('https:\u002F\u002Fexample.com', headless=True, network_idle=True)  # Fetch website under the radar!\nproducts = p.css('.product', auto_save=True)                                        # Scrape data that survives website design changes!\nproducts = p.css('.product', adaptive=True)                                         # Later, if the website structure changes, pass `adaptive=True` to find them!\n```\nOr scale up to full crawls\n```python\nfrom scrapling.spiders import Spider, Response\n\nclass MySpider(Spider):\n  name = \"demo\"\n  start_urls = [\"https:\u002F\u002Fexample.com\u002F\"]\n\n  async def parse(self, response: Response):\n      for item in response.css('.product'):\n          yield {\"title\": item.css('h2::text').get()}\n\nMySpider().start()\n```\n\n\u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fdataimpulse.com\u002F?utm_source=scrapling&utm_medium=banner&utm_campaign=scrapling\" target=\"_blank\" style=\"display:flex; justify-content:center; padding:4px 0;\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FDataImpulse.png\" alt=\"At DataImpulse, we specialize in developing custom proxy services for your business. Make requests from anywhere, collect data, and enjoy fast connections with our premium proxies.\" style=\"max-height:60px;\">\n    \u003C\u002Fa>\n\u003C\u002Fp>\n\n# Platinum Sponsors\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fcoldproxy.com\u002F\" target=\"_blank\" title=\"Residential, IPv6 & Datacenter Proxies for Web Scraping\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fcoldproxy.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd> \u003Ca href=\"https:\u002F\u002Fcoldproxy.com\u002F\" target=\"_blank\">\u003Cb>ColdProxy\u003C\u002Fb>\u003C\u002Fa> provides residential and datacenter proxies for stable web scraping, public data collection, and geo-targeted testing across 195+ countries.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fhypersolutions.co\u002F?utm_source=github&utm_medium=readme&utm_campaign=scrapling\" target=\"_blank\" title=\"Bot Protection Bypass API for Akamai, DataDome, Incapsula & Kasada\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FHyperSolutions.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd> Scrapling handles Cloudflare Turnstile. For enterprise-grade protection, \u003Ca href=\"https:\u002F\u002Fhypersolutions.co?utm_source=github&utm_medium=readme&utm_campaign=scrapling\">\n        \u003Cb>Hyper Solutions\u003C\u002Fb>\n      \u003C\u002Fa> provides API endpoints that generate valid antibot tokens for \u003Cb>Akamai\u003C\u002Fb>, \u003Cb>DataDome\u003C\u002Fb>, \u003Cb>Kasada\u003C\u002Fb>, and \u003Cb>Incapsula\u003C\u002Fb>. Simple API calls, no browser automation required. \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fbirdproxies.com\u002Ft\u002Fscrapling\" target=\"_blank\" title=\"At Bird Proxies, we eliminate your pains such as banned IPs, geo restriction, and high costs so you can focus on your work.\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FBirdProxies.jpg\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>Hey, we built \u003Ca href=\"https:\u002F\u002Fbirdproxies.com\u002Ft\u002Fscrapling\">\n        \u003Cb>BirdProxies\u003C\u002Fb>\n      \u003C\u002Fa> because proxies shouldn't be complicated or overpriced. Fast residential and ISP proxies in 195+ locations, fair pricing, and real support. \u003Cbr \u002F>\n      \u003Cb>Try our FlappyBird game on the landing page for free data!\u003C\u002Fb>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fevomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling\" target=\"_blank\" title=\"Evomi is your Swiss Quality Proxy Provider, starting at $0.49\u002FGB\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fevomi.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fevomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling\">\n        \u003Cb>Evomi\u003C\u002Fb>\n      \u003C\u002Fa>: residential proxies from $0.49\u002FGB. Scraping browser with fully spoofed Chromium, residential IPs, auto CAPTCHA solving, and anti-bot bypass. \u003C\u002Fbr>\n      \u003Cb>Scraper API for hassle-free results. MCP and N8N integrations are available.\u003C\u002Fb>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Ftikhub.io\u002F?utm_source=github.com\u002FD4Vinci\u002FScrapling&utm_medium=marketing_social&utm_campaign=retargeting&utm_content=carousel_ad\" target=\"_blank\" title=\"Unlock the Power of Social Media Data & AI\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FTikHub.jpg\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Ftikhub.io\u002F?utm_source=github.com\u002FD4Vinci\u002FScrapling&utm_medium=marketing_social&utm_campaign=retargeting&utm_content=carousel_ad\" target=\"_blank\">TikHub.io\u003C\u002Fa> provides 900+ stable APIs across 16+ platforms including TikTok, X, YouTube & Instagram, with 40M+ datasets. \u003Cbr \u002F> Also offers \u003Ca href=\"https:\u002F\u002Fai.tikhub.io\u002F?ref=KarimShoair\" target=\"_blank\">DISCOUNTED AI models\u003C\u002Fa> - Claude, GPT, GEMINI & more up to 71% off.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fwww.nsocks.com\u002F?keyword=2p67aivg\" target=\"_blank\" title=\"Scalable Web Data Access for AI Applications\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fnsocks.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n    \u003Ca href=\"https:\u002F\u002Fwww.nsocks.com\u002F?keyword=2p67aivg\" target=\"_blank\">Nsocks\u003C\u002Fa> provides fast Residential and ISP proxies for developers and scrapers. Global IP coverage, high anonymity, smart rotation, and reliable performance for automation and data extraction. Use \u003Ca href=\"https:\u002F\u002Fwww.xcrawl.com\u002F?keyword=2p67aivg\" target=\"_blank\">Xcrawl\u003C\u002Fa> to simplify large-scale web crawling.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fpetrosky.io\u002Fd4vinci\" target=\"_blank\" title=\"PetroSky delivers cutting-edge VPS hosting.\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fpetrosky.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n    Close your laptop. Your scrapers keep running. \u003Cbr \u002F>\n    \u003Ca href=\"https:\u002F\u002Fpetrosky.io\u002Fd4vinci\" target=\"_blank\">PetroSky VPS\u003C\u002Fa> - cloud servers built for nonstop automation. Windows and Linux machines with full control. From €6.99\u002Fmo.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fsubstack.thewebscraping.club\u002Fp\u002Fscrapling-hands-on-guide?utm_source=github&utm_medium=repo&utm_campaign=scrapling\" target=\"_blank\" title=\"The #1 newsletter dedicated to Web Scraping\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FTWSC.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n    Read a full review of \u003Ca href=\"https:\u002F\u002Fsubstack.thewebscraping.club\u002Fp\u002Fscrapling-hands-on-guide?utm_source=github&utm_medium=repo&utm_campaign=scrapling\" target=\"_blank\">Scrapling on The Web Scraping Club\u003C\u002Fa> (Nov 2025), the #1 newsletter dedicated to Web Scraping.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"http:\u002F\u002Fmangoproxy.com\u002F?utm_source=D4Vinci&utm_medium=GitHub&utm_campaign=D4Vinci\" target=\"_blank\" title=\"Proxies You Can Rely On: Residential, Server, and Mobile\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FMangoProxy.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n    \u003Ca href=\"http:\u002F\u002Fmangoproxy.com\u002F?utm_source=D4Vinci&utm_medium=GitHub&utm_campaign=D4Vinci\" target=\"_blank\">Stable proxies\u003C\u002Fa> for scraping, automation, and multi-accounting. Clean IPs, fast response, and reliable performance under load. Built for scalable workflows.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"200\">\n      \u003Ca href=\"https:\u002F\u002Fwww.swiftproxy.net\u002F?ref=D4Vinci\" target=\"_blank\" title=\"Scalable Solutions for Web Data Access\">\n        \u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FSwiftProxy.png\">\n      \u003C\u002Fa>\n    \u003C\u002Ftd>\n    \u003Ctd>\n    \u003Ca href=\"https:\u002F\u002Fwww.swiftproxy.net\u002F?ref=D4Vinci\" target=\"_blank\">Swiftproxy\u003C\u002Fa> provides scalable residential proxies with 80M+ IPs across 195+ countries, delivering fast, reliable connections, automatic rotation, and strong anti-block performance. Free trial available.\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n\u003Ci>\u003Csub>Do you want to show your ad here? Click [here](https:\u002F\u002Fgithub.com\u002Fsponsors\u002FD4Vinci\u002Fsponsorships?tier_id=586646)\u003C\u002Fsub>\u003C\u002Fi>\n# Sponsors \n\n\u003C!-- sponsors -->\n\u003Ca href=\"https:\u002F\u002Fwww.crawleo.dev\u002F?utm_source=github&utm_medium=sponsor&utm_campaign=scrapling\" target=\"_blank\" title=\"Supercharge your AI with Real-Time Web Intelligence\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fcrawleo.png\">\u003C\u002Fa>\n\u003Cbr\u002F>\n\n\u003Ca href=\"https:\u002F\u002Fserpapi.com\u002F?utm_source=scrapling\" target=\"_blank\" title=\"Scrape Google and other search engines with SerpApi\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FSerpApi.png\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fvisit.decodo.com\u002FDy6W0b\" target=\"_blank\" title=\"Try the Most Efficient Residential Proxies for Free\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fdecodo.png\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fhasdata.com\u002F?utm_source=github&utm_medium=banner&utm_campaign=D4Vinci\" target=\"_blank\" title=\"The web scraping service that actually beats anti-bot systems!\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fhasdata.png\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fproxyempire.io\u002F?ref=scrapling&utm_source=scrapling\" target=\"_blank\" title=\"Collect The Data Your Project Needs with the Best Residential Proxies\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FProxyEmpire.png\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fwww.webshare.io\u002F?referral_code=48r2m2cd5uz1\" target=\"_blank\" title=\"The Most Reliable Proxy with Unparalleled Performance\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fwebshare.png\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fwww.rapidproxy.io\u002F?ref=d4v\" target=\"_blank\" title=\"Affordable Access to the Proxy World – bypass CAPTCHAs blocks, and avoid additional costs.\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Frapidproxy.jpg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fwww.ipfoxy.com\u002F?r=scrapling\" target=\"_blank\" title=\"Unlock the Full Potential of Global Business with IPFoxy's High-Quality Rotating and Dedicated Proxy Services.\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FIPFoxy.jpg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fwww.ipcook.com\u002F?ref=EAENO9&utm_source=github&utm_medium=referral&utm_campaign=d4vinci_scrapling\" target=\"_blank\" title=\"Fast Proxies. Smart Pricing. Premium Performance.\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002FIPCook.png\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fproxiware.com\u002F?ref=scrapling\" target=\"_blank\" title=\"Collect Any Data. At Any Scale.\">\u003Cimg src=\"https:\u002F\u002Fraw.githubusercontent.com\u002FD4Vinci\u002FScrapling\u002Fmain\u002Fimages\u002Fproxiware.png\">\u003C\u002Fa>\n\n\n\u003C!-- \u002Fsponsors -->\n\n\u003Ci>\u003Csub>Do you want to show your ad here? Click [here](https:\u002F\u002Fgithub.com\u002Fsponsors\u002FD4Vinci) and choose the tier that suites you!\u003C\u002Fsub>\u003C\u002Fi>\n\n---\n\n## Key Features\n\n### Spiders - A Full Crawling Framework\n- 🕷️ **Scrapy-like Spider API**: Define spiders with `start_urls`, async `parse` callbacks, and `Request`\u002F`Response` objects.\n- ⚡ **Concurrent Crawling**: Configurable concurrency limits, per-domain throttling, and download delays.\n- 🔄 **Multi-Session Support**: Unified interface for HTTP requests, and stealthy headless browsers in a single spider - route requests to different sessions by ID.\n- 💾 **Pause & Resume**: Checkpoint-based crawl persistence. Press Ctrl+C for a graceful shutdown; restart to resume from where you left off.\n- 📡 **Streaming Mode**: Stream scraped items as they arrive via `async for item in spider.stream()` with real-time stats - ideal for UI, pipelines, and long-running crawls.\n- 🛡️ **Blocked Request Detection**: Automatic detection and retry of blocked requests with customizable logic.\n- 🤖 **Robots.txt Compliance**: Optional `robots_txt_obey` flag that respects `Disallow`, `Crawl-delay`, and `Request-rate` directives with per-domain caching.\n- 🧪 **Development Mode**: Cache responses to disk on the first run and replay them on subsequent runs - iterate on your `parse()` logic without re-hitting the target servers.\n- 📦 **Built-in Export**: Export results through hooks and your own pipeline or the built-in JSON\u002FJSONL with `result.items.to_json()` \u002F `result.items.to_jsonl()` respectively.\n\n### Advanced Websites Fetching with Session Support\n- **HTTP Requests**: Fast and stealthy HTTP requests with the `Fetcher` class. Can impersonate browsers' TLS fingerprint, headers, and use HTTP\u002F3.\n- **Dynamic Loading**: Fetch dynamic websites with full browser automation through the `DynamicFetcher` class supporting Playwright's Chromium and Google's Chrome.\n- **Anti-bot Bypass**: Advanced stealth capabilities with `StealthyFetcher` and fingerprint spoofing. Can easily bypass all types of Cloudflare's Turnstile\u002FInterstitial with automation.\n- **Session Management**: Persistent session support with `FetcherSession`, `StealthySession`, and `DynamicSession` classes for cookie and state management across requests.\n- **Proxy Rotation**: Built-in `ProxyRotator` with cyclic or custom rotation strategies across all session types, plus per-request proxy overrides.\n- **Domain & Ad Blocking**: Block requests to specific domains (and their subdomains) or enable built-in ad blocking (~3,500 known ad\u002Ftracker domains) in browser-based fetchers.\n- **DNS Leak Prevention**: Optional DNS-over-HTTPS support to route DNS queries through Cloudflare's DoH, preventing DNS leaks when using proxies.\n- **Async Support**: Complete async support across all fetchers and dedicated async session classes.\n\n### Adaptive Scraping & AI Integration\n- 🔄 **Smart Element Tracking**: Relocate elements after website changes using intelligent similarity algorithms.\n- 🎯 **Smart Flexible Selection**: CSS selectors, XPath selectors, filter-based search, text search, regex search, and more.\n- 🔍 **Find Similar Elements**: Automatically locate elements similar to found elements.\n- 🤖 **MCP Server to be used with AI**: Built-in MCP server for AI-assisted Web Scraping and data extraction. The MCP server features powerful, custom capabilities that leverage Scrapling to extract targeted content before passing it to the AI (Claude\u002FCursor\u002Fetc), thereby speeding up operations and reducing costs by minimizing token usage. ([demo video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qyFk3ZNwOxE))\n\n### High-Performance & battle-tested Architecture\n- 🚀 **Lightning Fast**: Optimized performance outperforming most Python scraping libraries.\n- 🔋 **Memory Efficient**: Optimized data structures and lazy loading for a minimal memory footprint.\n- ⚡ **Fast JSON Serialization**: 10x faster than the standard library.\n- 🏗️ **Battle tested**: Not only does Scrapling have 92% test coverage and full type hints coverage, but it has been used daily by hundreds of Web Scrapers over the past year.\n\n### Developer\u002FWeb Scraper Friendly Experience\n- 🎯 **Interactive Web Scraping Shell**: Optional built-in IPython shell with Scrapling integration, shortcuts, and new tools to speed up Web Scraping scripts development, like converting curl requests to Scrapling requests and viewing requests results in your browser.\n- 🚀 **Use it directly from the Terminal**: Optionally, you can use Scrapling to scrape a URL without writing a single line of code!\n- 🛠️ **Rich Navigation API**: Advanced DOM traversal with parent, sibling, and child navigation methods.\n- 🧬 **Enhanced Text Processing**: Built-in regex, cleaning methods, and optimized string operations.\n- 📝 **Auto Selector Generation**: Generate robust CSS\u002FXPath selectors for any element.\n- 🔌 **Familiar API**: Similar to Scrapy\u002FBeautifulSoup with the same pseudo-elements used in Scrapy\u002FParsel.\n- 📘 **Complete Type Coverage**: Full type hints for excellent IDE support and code completion. The entire codebase is automatically scanned with **PyRight** and **MyPy** with each change.\n- 🔋 **Ready Docker image**: With each release, a Docker image containing all browsers is automatically built and pushed.\n\n## Getting Started\n\nLet's give you a quick glimpse of what Scrapling can do without deep diving.\n\n### Basic Usage\nHTTP requests with session support\n```python\nfrom scrapling.fetchers import Fetcher, FetcherSession\n\nwith FetcherSession(impersonate='chrome') as session:  # Use latest version of Chrome's TLS fingerprint\n    page = session.get('https:\u002F\u002Fquotes.toscrape.com\u002F', stealthy_headers=True)\n    quotes = page.css('.quote .text::text').getall()\n\n# Or use one-off requests\npage = Fetcher.get('https:\u002F\u002Fquotes.toscrape.com\u002F')\nquotes = page.css('.quote .text::text').getall()\n```\nAdvanced stealth mode\n```python\nfrom scrapling.fetchers import StealthyFetcher, StealthySession\n\nwith StealthySession(headless=True, solve_cloudflare=True) as session:  # Keep the browser open until you finish\n    page = session.fetch('https:\u002F\u002Fnopecha.com\u002Fdemo\u002Fcloudflare', google_search=False)\n    data = page.css('#padded_content a').getall()\n\n# Or use one-off request style, it opens the browser for this request, then closes it after finishing\npage = StealthyFetcher.fetch('https:\u002F\u002Fnopecha.com\u002Fdemo\u002Fcloudflare')\ndata = page.css('#padded_content a').getall()\n```\nFull browser automation\n```python\nfrom scrapling.fetchers import DynamicFetcher, DynamicSession\n\nwith DynamicSession(headless=True, disable_resources=False, network_idle=True) as session:  # Keep the browser open until you finish\n    page = session.fetch('https:\u002F\u002Fquotes.toscrape.com\u002F', load_dom=False)\n    data = page.xpath('\u002F\u002Fspan[@class=\"text\"]\u002Ftext()').getall()  # XPath selector if you prefer it\n\n# Or use one-off request style, it opens the browser for this request, then closes it after finishing\npage = DynamicFetcher.fetch('https:\u002F\u002Fquotes.toscrape.com\u002F')\ndata = page.css('.quote .text::text').getall()\n```\n\n### Spiders\nBuild full crawlers with concurrent requests, multiple session types, and pause\u002Fresume:\n```python\nfrom scrapling.spiders import Spider, Request, Response\n\nclass QuotesSpider(Spider):\n    name = \"quotes\"\n    start_urls = [\"https:\u002F\u002Fquotes.toscrape.com\u002F\"]\n    concurrent_requests = 10\n    \n    async def parse(self, response: Response):\n        for quote in response.css('.quote'):\n            yield {\n                \"text\": quote.css('.text::text').get(),\n                \"author\": quote.css('.author::text').get(),\n            }\n            \n        next_page = response.css('.next a')\n        if next_page:\n            yield response.follow(next_page[0].attrib['href'])\n\nresult = QuotesSpider().start()\nprint(f\"Scraped {len(result.items)} quotes\")\nresult.items.to_json(\"quotes.json\")\n```\nUse multiple session types in a single spider:\n```python\nfrom scrapling.spiders import Spider, Request, Response\nfrom scrapling.fetchers import FetcherSession, AsyncStealthySession\n\nclass MultiSessionSpider(Spider):\n    name = \"multi\"\n    start_urls = [\"https:\u002F\u002Fexample.com\u002F\"]\n    \n    def configure_sessions(self, manager):\n        manager.add(\"fast\", FetcherSession(impersonate=\"chrome\"))\n        manager.add(\"stealth\", AsyncStealthySession(headless=True), lazy=True)\n    \n    async def parse(self, response: Response):\n        for link in response.css('a::attr(href)').getall():\n            # Route protected pages through the stealth session\n            if \"protected\" in link:\n                yield Request(link, sid=\"stealth\")\n            else:\n                yield Request(link, sid=\"fast\", callback=self.parse)  # explicit callback\n```\nPause and resume long crawls with checkpoints by running the spider like this:\n```python\nQuotesSpider(crawldir=\".\u002Fcrawl_data\").start()\n```\nPress Ctrl+C to pause gracefully - progress is saved automatically. Later, when you start the spider again, pass the same `crawldir`, and it will resume from where it stopped.\n\n### Advanced Parsing & Navigation\n```python\nfrom scrapling.fetchers import Fetcher\n\n# Rich element selection and navigation\npage = Fetcher.get('https:\u002F\u002Fquotes.toscrape.com\u002F')\n\n# Get quotes with multiple selection methods\nquotes = page.css('.quote')  # CSS selector\nquotes = page.xpath('\u002F\u002Fdiv[@class=\"quote\"]')  # XPath\nquotes = page.find_all('div', {'class': 'quote'})  # BeautifulSoup-style\n# Same as\nquotes = page.find_all('div', class_='quote')\nquotes = page.find_all(['div'], class_='quote')\nquotes = page.find_all(class_='quote')  # and so on...\n# Find element by text content\nquotes = page.find_by_text('quote', tag='div')\n\n# Advanced navigation\nquote_text = page.css('.quote')[0].css('.text::text').get()\nquote_text = page.css('.quote').css('.text::text').getall()  # Chained selectors\nfirst_quote = page.css('.quote')[0]\nauthor = first_quote.next_sibling.css('.author::text')\nparent_container = first_quote.parent\n\n# Element relationships and similarity\nsimilar_elements = first_quote.find_similar()\nbelow_elements = first_quote.below_elements()\n```\nYou can use the parser right away if you don't want to fetch websites like below:\n```python\nfrom scrapling.parser import Selector\n\npage = Selector(\"\u003Chtml>...\u003C\u002Fhtml>\")\n```\nAnd it works precisely the same way!\n\n### Async Session Management Examples\n```python\nimport asyncio\nfrom scrapling.fetchers import FetcherSession, AsyncStealthySession, AsyncDynamicSession\n\nasync with FetcherSession(http3=True) as session:  # `FetcherSession` is context-aware and can work in both sync\u002Fasync patterns\n    page1 = session.get('https:\u002F\u002Fquotes.toscrape.com\u002F')\n    page2 = session.get('https:\u002F\u002Fquotes.toscrape.com\u002F', impersonate='firefox135')\n\n# Async session usage\nasync with AsyncStealthySession(max_pages=2) as session:\n    tasks = []\n    urls = ['https:\u002F\u002Fexample.com\u002Fpage1', 'https:\u002F\u002Fexample.com\u002Fpage2']\n    \n    for url in urls:\n        task = session.fetch(url)\n        tasks.append(task)\n    \n    print(session.get_pool_stats())  # Optional - The status of the browser tabs pool (busy\u002Ffree\u002Ferror)\n    results = await asyncio.gather(*tasks)\n    print(session.get_pool_stats())\n```\n\n## CLI & Interactive Shell\n\nScrapling includes a powerful command-line interface:\n\n[![asciicast](https:\u002F\u002Fasciinema.org\u002Fa\u002F736339.svg)](https:\u002F\u002Fasciinema.org\u002Fa\u002F736339)\n\nLaunch the interactive Web Scraping shell\n```bash\nscrapling shell\n```\nExtract pages to a file directly without programming (Extracts the content inside the `body` tag by default). If the output file ends with `.txt`, then the text content of the target will be extracted. If it ends in `.md`, it will be a Markdown representation of the HTML content; if it ends in `.html`, it will be the HTML content itself.\n```bash\nscrapling extract get 'https:\u002F\u002Fexample.com' content.md\nscrapling extract get 'https:\u002F\u002Fexample.com' content.txt --css-selector '#fromSkipToProducts' --impersonate 'chrome'  # All elements matching the CSS selector '#fromSkipToProducts'\nscrapling extract fetch 'https:\u002F\u002Fexample.com' content.md --css-selector '#fromSkipToProducts' --no-headless\nscrapling extract stealthy-fetch 'https:\u002F\u002Fnopecha.com\u002Fdemo\u002Fcloudflare' captchas.html --css-selector '#padded_content a' --solve-cloudflare\n```\n\n> [!NOTE]\n> There are many additional features, but we want to keep this page concise, including the MCP server and the interactive Web Scraping Shell. Check out the full documentation [here](https:\u002F\u002Fscrapling.readthedocs.io\u002Fen\u002Flatest\u002F)\n\n## Performance Benchmarks\n\nScrapling isn't just powerful-it's also blazing fast. The following benchmarks compare Scrapling's parser with the latest versions of other popular libraries.\n\n### Text Extraction Speed Test (5000 nested elements)\n\n| # |      Library      | Time (ms) | vs Scrapling | \n|---|:-----------------:|:---------:|:------------:|\n| 1 |     Scrapling     |   2.02    |     1.0x     |\n| 2 |   Parsel\u002FScrapy   |   2.04    |     1.01     |\n| 3 |     Raw Lxml      |   2.54    |    1.257     |\n| 4 |      PyQuery      |   24.17   |     ~12x     |\n| 5 |    Selectolax     |   82.63   |     ~41x     |\n| 6 |  MechanicalSoup   |  1549.71  |   ~767.1x    |\n| 7 |   BS4 with Lxml   |  1584.31  |   ~784.3x    |\n| 8 | BS4 with html5lib |  3391.91  |   ~1679.1x   |\n\n\n### Element Similarity & Text Search Performance\n\nScrapling's adaptive element finding capabilities significantly outperform alternatives:\n\n| Library     | Time (ms) | vs Scrapling |\n|-------------|:---------:|:------------:|\n| Scrapling   |   2.39    |     1.0x     |\n| AutoScraper |   12.45   |    5.209x    |\n\n\n> All benchmarks represent averages of 100+ runs. See [benchmarks.py](https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fbenchmarks.py) for methodology.\n\n## Installation\n\nScrapling requires Python 3.10 or higher:\n\n```bash\npip install scrapling\n```\n\nThis installation only includes the parser engine and its dependencies, without any fetchers or commandline dependencies.\n\n### Optional Dependencies\n\n1. If you are going to use any of the extra features below, the fetchers, or their classes, you will need to install fetchers' dependencies and their browser dependencies as follows:\n    ```bash\n    pip install \"scrapling[fetchers]\"\n    \n    scrapling install           # normal install\n    scrapling install  --force  # force reinstall\n    ```\n\n    This downloads all browsers, along with their system dependencies and fingerprint manipulation dependencies.\n\n    Or you can install them from the code instead of running a command like this:\n    ```python\n    from scrapling.cli import install\n    \n    install([], standalone_mode=False)          # normal install\n    install([\"--force\"], standalone_mode=False) # force reinstall\n    ```\n\n2. Extra features:\n   - Install the MCP server feature:\n       ```bash\n       pip install \"scrapling[ai]\"\n       ```\n   - Install shell features (Web Scraping shell and the `extract` command): \n       ```bash\n       pip install \"scrapling[shell]\"\n       ```\n   - Install everything: \n       ```bash\n       pip install \"scrapling[all]\"\n       ```\n   Remember that you need to install the browser dependencies with `scrapling install` after any of these extras (if you didn't already)\n\n### Docker\nYou can also install a Docker image with all extras and browsers with the following command from DockerHub:\n```bash\ndocker pull pyd4vinci\u002Fscrapling\n```\nOr download it from the GitHub registry:\n```bash\ndocker pull ghcr.io\u002Fd4vinci\u002Fscrapling:latest\n```\nThis image is automatically built and pushed using GitHub Actions and the repository's main branch.\n\n## Contributing\n\nWe welcome contributions! Please read our [contributing guidelines](https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002FCONTRIBUTING.md) before getting started.\n\n## Disclaimer\n\n> [!CAUTION]\n> This library is provided for educational and research purposes only. By using this library, you agree to comply with local and international data scraping and privacy laws. The authors and contributors are not responsible for any misuse of this software. Always respect the terms of service of websites and robots.txt files.\n\n## 🎓 Citations\nIf you have used our library for research purposes please quote us with the following reference:\n```text\n  @misc{scrapling,\n    author = {Karim Shoair},\n    title = {Scrapling},\n    year = {2024},\n    url = {https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling},\n    note = {An adaptive Web Scraping framework that handles everything from a single request to a full-scale crawl!}\n  }\n```\n\n## License\n\nThis work is licensed under the BSD-3-Clause License.\n\n## Acknowledgments\n\nThis project includes code adapted from:\n- Parsel (BSD License)-Used for [translator](https:\u002F\u002Fgithub.com\u002FD4Vinci\u002FScrapling\u002Fblob\u002Fmain\u002Fscrapling\u002Fcore\u002Ftranslator.py) submodule\n\n---\n\u003Cdiv align=\"center\">\u003Csmall>Designed & crafted with ❤️ by Karim Shoair.\u003C\u002Fsmall>\u003C\u002Fdiv>\u003Cbr>\n","Scrapling 是一个自适应的网页抓取框架，能够处理从单个请求到大规模爬取的所有任务。它基于 Python 开发，集成了 Playwright 和 XPath 等技术，支持隐身模式和自动化操作，可以有效应对现代网站的各种反爬虫机制。此外，Scrapling 还引入了 AI 技术来增强数据提取能力。该工具非常适合需要进行高效、灵活且可扩展的数据抓取的应用场景，如市场调研、竞品分析或内容聚合等。","2026-06-11 02:30:25","trending"]