Node.js SDK

The official Node.js SDK for Scrpy. TypeScript-first with full async/await support.

Installation

bash
npm install scrpy
bash
yarn add scrpy
bash
pnpm add scrpy

Requires Node.js 18+ for native fetch support.

Quick Start

javascript
import Scrpy from 'scrpy';

const client = new Scrpy('sk_live_xxxxx');

const result = await client.scrape({
  url: 'https://example.com',
  selectors: {
    title: 'h1',
    description: 'meta[name=description]::attr(content)'
  }
});

console.log(result.data);
// { title: 'Example Domain', description: '...' }

TypeScript Support

Full TypeScript support with generic types for your data:

javascript
import Scrpy, { ScrapeResult } from 'scrpy';

interface ProductData {
  name: string;
  price: number;
  inStock: boolean;
}

const client = new Scrpy('sk_live_xxxxx');

const result = await client.scrape<ProductData>({
  url: 'https://store.com/product/123',
  selectors: {
    name: '.product-name',
    price: '.price | parseNumber',
    inStock: '.stock | contains:In Stock'
  }
});

// TypeScript knows result.data is ProductData
console.log(result.data.name);   // string
console.log(result.data.price);  // number

Advanced Options

javascript
const result = await client.scrape({
  url: 'https://spa-website.com/products',
  selectors: {
    products: '.product-card::all',
    prices: '.price | parseNumber::all'
  },
  render: true,           // Enable JS rendering
  antiBot: true,          // Enable anti-bot bypass
  proxy: 'us',            // Use US proxy
  waitFor: '.products',   // Wait for element
  timeout: 30000          // 30 second timeout
});

console.log(`Found ${result.data.products.length} products`);

Concurrent Scraping

javascript
import Scrpy from 'scrpy';

const client = new Scrpy('sk_live_xxxxx');

const urls = [
  'https://example.com/page1',
  'https://example.com/page2',
  'https://example.com/page3',
];

// Scrape all URLs concurrently
const results = await Promise.all(
  urls.map(url => 
    client.scrape({
      url,
      selectors: { title: 'h1' }
    })
  )
);

results.forEach(result => {
  console.log(result.data.title);
});

Bulk Scraping with Jobs

javascript
const job = await client.jobs.create({
  name: 'Product Catalog',
  urls: [
    'https://store.com/product/1',
    'https://store.com/product/2',
    'https://store.com/product/3',
  ],
  selectors: {
    name: '.product-name',
    price: '.price | parseNumber'
  }
});

console.log(`Job created: ${job.id}`);

// Wait for completion
const completedJob = await client.jobs.wait(job.id);

// Process results
for (const result of completedJob.results) {
  console.log(`${result.url}: ${JSON.stringify(result.data)}`);
}

Error Handling

javascript
import Scrpy, {
  ScrpyError,
  RateLimitError,
  AuthenticationError,
  BlockedError
} from 'scrpy';

const client = new Scrpy('sk_live_xxxxx');

try {
  const result = await client.scrape({ url: 'https://example.com' });
} catch (error) {
  if (error instanceof RateLimitError) {
    console.log(`Rate limited. Retry after ${error.retryAfter}s`);
  } else if (error instanceof BlockedError) {
    console.log('Blocked. Try enabling antiBot: true');
  } else if (error instanceof AuthenticationError) {
    console.log('Invalid API key');
  } else if (error instanceof ScrpyError) {
    console.log(`Scraping failed: ${error.message}`);
  }
}

Express.js Example

javascript
import express from 'express';
import Scrpy from 'scrpy';

const app = express();
const client = new Scrpy(process.env.SCRPY_API_KEY!);

app.get('/api/scrape', async (req, res) => {
  const { url } = req.query;
  
  if (!url || typeof url !== 'string') {
    return res.status(400).json({ error: 'URL required' });
  }

  try {
    const result = await client.scrape({
      url,
      selectors: {
        title: 'h1',
        content: 'article'
      }
    });
    
    res.json(result.data);
  } catch (error) {
    res.status(500).json({ error: 'Scraping failed' });
  }
});

app.listen(3000);

Resources