Node.js Integration
Integrate ScrapeHub into your Node.js applications
Installation
Terminal# Using npm npm install @scrapehub/node # Using yarn yarn add @scrapehub/node # Using pnpm pnpm add @scrapehub/node
Quick Start
basic-usage.js
const { ScrapeHubClient } = require('@scrapehub/node');
// Initialize client
const client = new ScrapeHubClient({
apiKey: 'sk_live_xxxx_449x'
});
// Simple scrape
async function main() {
const result = await client.scrape({
url: 'https://example.com/products',
engine: 'neural-x1'
});
console.log(`Extracted ${result.data.length} items`);
console.log(result.data);
}
main();TypeScript Support
typescript-usage.ts
import { ScrapeHubClient, ScrapeConfig, ScrapeResult } from '@scrapehub/node';
interface Product {
name: string;
price: number;
rating: number;
url: string;
}
const client = new ScrapeHubClient({
apiKey: process.env.SCRAPEHUB_API_KEY!
});
async function scrapeProducts(): Promise<Product[]> {
const config: ScrapeConfig = {
url: 'https://example.com/products',
engine: 'neural-x1',
format: 'json'
};
const result: ScrapeResult<Product> = await client.scrape(config);
return result.data;
}
scrapeProducts().then(products => {
console.log(`Found ${products.length} products`);
products.forEach(product => {
console.log(`${product.name}: $${product.price}`);
});
});Advanced Configuration
advanced-config.js
const { ScrapeHubClient } = require('@scrapehub/node');
const client = new ScrapeHubClient({
apiKey: 'sk_live_xxxx_449x',
timeout: 300000, // 5 minutes
retries: 3,
baseUrl: 'https://api.scrapehub.io/v4'
});
async function advancedScrape() {
const result = await client.scrape({
url: 'https://example.com/products',
engine: 'neural-x1',
format: 'json',
// Pagination
pagination: {
enabled: true,
maxPages: 10,
selector: 'a.next-page'
},
// Custom headers
headers: {
'User-Agent': 'Mozilla/5.0...',
'Accept-Language': 'en-US,en;q=0.9'
},
// JavaScript rendering
renderJs: true,
waitForSelector: '.product-list',
// Proxy settings
proxy: {
enabled: true,
region: 'us-east',
residential: true
},
// Rate limiting
rateLimit: {
requestsPerSecond: 2,
delayBetweenPages: 1000
}
});
return result;
}Async/Promises
async-promises.js
const { ScrapeHubClient } = require('@scrapehub/node');
const client = new ScrapeHubClient({ apiKey: 'sk_live_xxxx_449x' });
// Multiple URLs with Promise.all
async function scrapeMultipleUrls() {
const urls = [
'https://example.com/category/1',
'https://example.com/category/2',
'https://example.com/category/3'
];
const results = await Promise.all(
urls.map(url => client.scrape({ url, engine: 'neural-x1' }))
);
const allItems = results.flatMap(result => result.data);
console.log(`Total items: ${allItems.length}`);
return allItems;
}
// With Promise.allSettled for error handling
async function scrapeWithErrorHandling() {
const urls = ['url1', 'url2', 'url3'];
const results = await Promise.allSettled(
urls.map(url => client.scrape({ url, engine: 'neural-x1' }))
);
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
console.log(`URL ${index}: ${result.value.data.length} items`);
} else {
console.error(`URL ${index} failed: ${result.reason}`);
}
});
}Job Management
job-management.js
const { ScrapeHubClient } = require('@scrapehub/node');
const client = new ScrapeHubClient({ apiKey: 'sk_live_xxxx_449x' });
// Create and monitor job
async function createAndMonitorJob() {
// Create job
const job = await client.createJob({
url: 'https://example.com/large-dataset',
engine: 'neural-x1'
});
console.log(`Job created: ${job.id}`);
// Poll job status
const interval = setInterval(async () => {
const status = await job.getStatus();
console.log(`Progress: ${status.progress}%`);
if (status.isComplete) {
clearInterval(interval);
if (status.isSuccessful) {
const results = await job.getResults();
console.log(`Extracted ${results.length} items`);
} else {
console.error(`Job failed: ${status.errorMessage}`);
}
}
}, 5000);
}
// List jobs
async function listJobs() {
const jobs = await client.listJobs({
limit: 10,
status: 'completed'
});
jobs.forEach(job => {
console.log(`${job.id}: ${job.status} - ${job.createdAt}`);
});
}Express.js Integration
express-integration.js
const express = require('express');
const { ScrapeHubClient } = require('@scrapehub/node');
const app = express();
const client = new ScrapeHubClient({ apiKey: process.env.SCRAPEHUB_API_KEY });
app.use(express.json());
// Scrape endpoint
app.post('/api/scrape', async (req, res) => {
try {
const { url, engine = 'neural-x1' } = req.body;
const result = await client.scrape({ url, engine });
res.json({
success: true,
itemCount: result.data.length,
data: result.data
});
} catch (error) {
res.status(500).json({
success: false,
error: error.message
});
}
});
// Webhook endpoint
app.post('/api/webhook', (req, res) => {
const { event, jobId } = req.body;
if (event === 'job.completed') {
console.log(`Job ${jobId} completed`);
// Process results
}
res.json({ status: 'received' });
});
app.listen(3000, () => {
console.log('Server running on port 3000');
});Error Handling
error-handling.js
const {
ScrapeHubClient,
AuthenticationError,
RateLimitError,
InvalidRequestError,
ScraperError
} = require('@scrapehub/node');
const client = new ScrapeHubClient({ apiKey: 'sk_live_xxxx_449x' });
async function scrapeWithErrorHandling() {
try {
const result = await client.scrape({
url: 'https://example.com'
});
return result.data;
} catch (error) {
if (error instanceof AuthenticationError) {
console.error('Authentication failed:', error.message);
// Check your API key
} else if (error instanceof RateLimitError) {
console.error('Rate limit exceeded:', error.message);
console.log(`Retry after: ${error.retryAfter} seconds`);
// Implement exponential backoff
} else if (error instanceof InvalidRequestError) {
console.error('Invalid request:', error.message);
console.log('Error details:', error.details);
// Fix request parameters
} else if (error instanceof ScraperError) {
console.error('Scraper failed:', error.message);
console.log('Target URL:', error.url);
console.log('Error code:', error.code);
// Handle scraper-specific errors
} else {
console.error('Unexpected error:', error);
}
throw error;
}
}API Reference
For complete API documentation, visit our Node.js SDK reference:
View Node.js SDK Docs →