Python SDK

The official Python SDK for Scrpy. Fully typed, async-ready, and Pythonic.

Installation

bash
pip install scrpy

Requires Python 3.8+. For async support, Python 3.10+ recommended.

Quick Start

python
from scrpy import Scrpy

# Initialize client
client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")

# Simple scrape
result = client.scrape(
    url=#a5d6ff;">"https://example.com",
    selectors={
        #a5d6ff;">"title": "h1",
        #a5d6ff;">"description": "meta[name=description]::attr(content)"
    }
)

print(result.data)
# {'title': 'Example Domain', 'description': '...'}

Async Usage

python
import asyncio
from scrpy import AsyncScrpy

async def main():
    client = AsyncScrpy(api_key=#a5d6ff;">"sk_live_xxxxx")
    
    # Scrape multiple URLs concurrently
    urls = [
        #a5d6ff;">"https://example.com/page1",
        #a5d6ff;">"https://example.com/page2",
        #a5d6ff;">"https://example.com/page3",
    ]
    
    tasks = [
        client.scrape(url=url, selectors={#a5d6ff;">"title": "h1"})
        for url in urls
    ]
    
    results = await asyncio.gather(*tasks)
    
    for result in results:
        print(result.data[#a5d6ff;">"title"])

asyncio.run(main())

Advanced Options

python
from scrpy import Scrpy

client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")

result = client.scrape(
    url=#a5d6ff;">"https://protected-site.com/products",
    selectors={
        #a5d6ff;">"products": ".product-card::all",
        #a5d6ff;">"prices": ".price | parseNumber::all"
    },
    render=True,           # Enable JS rendering
    anti_bot=True,         # Enable anti-bot bypass
    proxy=#a5d6ff;">"us",            # Use US proxy
    wait_for=#a5d6ff;">".products",  # Wait for element
    timeout=30000          # 30 second timeout
)

print(f#a5d6ff;">"Found {len(result.data['products'])} products")

Bulk Scraping with Jobs

python
from scrpy import Scrpy

client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")

# Create a job for multiple URLs
job = client.jobs.create(
    name=#a5d6ff;">"Product Catalog",
    urls=[
        #a5d6ff;">"https://store.com/product/1",
        #a5d6ff;">"https://store.com/product/2",
        #a5d6ff;">"https://store.com/product/3",
    ],
    selectors={
        #a5d6ff;">"name": ".product-name",
        #a5d6ff;">"price": ".price | parseNumber"
    }
)

print(f#a5d6ff;">"Job created: {job.id}")

# Wait for completion
job = client.jobs.wait(job.id)

# Get results
for result in job.results:
    print(f#a5d6ff;">"{result.url}: {result.data}")

Error Handling

python
from scrpy import Scrpy
from scrpy.exceptions import (
    ScrpyError,
    RateLimitError,
    AuthenticationError,
    BlockedError
)

client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")

try:
    result = client.scrape(url=#a5d6ff;">"https://example.com")
except RateLimitError as e:
    print(f#a5d6ff;">"Rate limited. Retry after {e.retry_after} seconds")
except BlockedError:
    print(#a5d6ff;">"Request blocked. Try enabling anti_bot=True")
except AuthenticationError:
    print(#a5d6ff;">"Invalid API key")
except ScrpyError as e:
    print(f#a5d6ff;">"Scraping failed: {e.message}")

Type Hints

The SDK is fully typed for excellent IDE support:

python
from scrpy import Scrpy
from scrpy.types import ScrapeResult, ScrapeOptions

client = Scrpy(api_key=#a5d6ff;">"sk_live_xxxxx")

options: ScrapeOptions = {
    #a5d6ff;">"url": "https://example.com",
    #a5d6ff;">"selectors": {"title": "h1"},
    #a5d6ff;">"render": True
}

result: ScrapeResult = client.scrape(**options)

# IDE knows result.data is Dict[str, Any]
# IDE knows result.metadata.duration is int

Resources