Python SDK
The official Python SDK for Scrpy. Fully typed, async-ready, and Pythonic.
Installation
bash
pip install scrpyRequires Python 3.8+. For async support, Python 3.10+ recommended.
Quick Start
python
from scrpy import Scrpy
# Initialize client
client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
# Simple scrape
result = client.scrape(
url=#a5d6ff;">"https://example.com",
selectors={
#a5d6ff;">"title": "h1",
#a5d6ff;">"description": "meta[name=description]::attr(content)"
}
)
print(result.data)
# {'title': 'Example Domain', 'description': '...'}Async Usage
python
import asyncio
from scrpy import AsyncScrpy
async def main():
client = AsyncScrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
# Scrape multiple URLs concurrently
urls = [
#a5d6ff;">"https://example.com/page1",
#a5d6ff;">"https://example.com/page2",
#a5d6ff;">"https://example.com/page3",
]
tasks = [
client.scrape(url=url, selectors={#a5d6ff;">"title": "h1"})
for url in urls
]
results = await asyncio.gather(*tasks)
for result in results:
print(result.data[#a5d6ff;">"title"])
asyncio.run(main())Advanced Options
python
from scrpy import Scrpy
client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
result = client.scrape(
url=#a5d6ff;">"https://protected-site.com/products",
selectors={
#a5d6ff;">"products": ".product-card::all",
#a5d6ff;">"prices": ".price | parseNumber::all"
},
render=True, # Enable JS rendering
anti_bot=True, # Enable anti-bot bypass
proxy=#a5d6ff;">"us", # Use US proxy
wait_for=#a5d6ff;">".products", # Wait for element
timeout=30000 # 30 second timeout
)
print(f#a5d6ff;">"Found {len(result.data['products'])} products")Bulk Scraping with Jobs
python
from scrpy import Scrpy
client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
# Create a job for multiple URLs
job = client.jobs.create(
name=#a5d6ff;">"Product Catalog",
urls=[
#a5d6ff;">"https://store.com/product/1",
#a5d6ff;">"https://store.com/product/2",
#a5d6ff;">"https://store.com/product/3",
],
selectors={
#a5d6ff;">"name": ".product-name",
#a5d6ff;">"price": ".price | parseNumber"
}
)
print(f#a5d6ff;">"Job created: {job.id}")
# Wait for completion
job = client.jobs.wait(job.id)
# Get results
for result in job.results:
print(f#a5d6ff;">"{result.url}: {result.data}")Error Handling
python
from scrpy import Scrpy
from scrpy.exceptions import (
ScrpyError,
RateLimitError,
AuthenticationError,
BlockedError
)
client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
try:
result = client.scrape(url=#a5d6ff;">"https://example.com")
except RateLimitError as e:
print(f#a5d6ff;">"Rate limited. Retry after {e.retry_after} seconds")
except BlockedError:
print(#a5d6ff;">"Request blocked. Try enabling anti_bot=True")
except AuthenticationError:
print(#a5d6ff;">"Invalid API key")
except ScrpyError as e:
print(f#a5d6ff;">"Scraping failed: {e.message}")Type Hints
The SDK is fully typed for excellent IDE support:
python
from scrpy import Scrpy
from scrpy.types import ScrapeResult, ScrapeOptions
client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
options: ScrapeOptions = {
#a5d6ff;">"url": "https://example.com",
#a5d6ff;">"selectors": {"title": "h1"},
#a5d6ff;">"render": True
}
result: ScrapeResult = client.scrape(**options)
# IDE knows result.data is Dict[str, Any]
# IDE knows result.metadata.duration is int