Deploy Reader to serverless platforms.
Serverless deployment requires special consideration because:
- Chrome can't run in standard serverless environments
- Cold starts are slow for browser automation
- Memory and timeout limits apply
Solution: Use remote browser services or container-based serverless.
Note: For serverless environments, use the direct
scrape()function withconnectionToCoreinstead ofReaderClient, since you're connecting to a remote browser service rather than managing a local HeroCore instance.
Connect to a hosted Chrome instance instead of running locally.
import { scrape } from "@vakra-dev/reader";
const result = await scrape({
urls: ["https://example.com"],
connectionToCore: "wss://chrome.browserless.io?token=YOUR_TOKEN",
});- Browserless - Popular, good Hero support
- Bright Data - Built-in proxy rotation
- Apify - Browser automation platform
Lambda supports containers, which can include Chrome.
FROM public.ecr.aws/lambda/nodejs:20
# Install Chrome dependencies
RUN yum install -y \
chromium \
nss \
freetype \
freetype-devel \
fontconfig \
pango \
--skip-broken
ENV CHROME_PATH=/usr/bin/chromium-browser
ENV FONTCONFIG_PATH=/etc/fonts
COPY package*.json ./
RUN npm ci --only=production
COPY . .
CMD ["dist/handler.handler"]// handler.ts
import { scrape } from "@vakra-dev/reader";
import { APIGatewayProxyHandler } from "aws-lambda";
export const handler: APIGatewayProxyHandler = async (event) => {
const body = JSON.parse(event.body || "{}");
try {
const result = await scrape({
urls: body.urls,
formats: body.formats || ["markdown"],
showChrome: false,
});
return {
statusCode: 200,
body: JSON.stringify(result),
};
} catch (error: any) {
return {
statusCode: 500,
body: JSON.stringify({ error: error.message }),
};
}
};# Build and push to ECR
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com
docker build -t reader-lambda .
docker tag reader-lambda:latest YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/reader-lambda:latest
docker push YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/reader-lambda:latest
# Create Lambda function
aws lambda create-function \
--function-name reader \
--package-type Image \
--code ImageUri=YOUR_ACCOUNT.dkr.ecr.us-east-1.amazonaws.com/reader-lambda:latest \
--role arn:aws:iam::YOUR_ACCOUNT:role/lambda-execution-role \
--memory-size 2048 \
--timeout 60Use Browserless or similar with standard Lambda:
// handler.ts
import { scrape } from "@vakra-dev/reader";
export const handler = async (event: any) => {
const body = JSON.parse(event.body || "{}");
const result = await scrape({
urls: body.urls,
formats: body.formats || ["markdown"],
connectionToCore: process.env.BROWSERLESS_URL,
});
return {
statusCode: 200,
body: JSON.stringify(result),
};
};// api/scrape.ts
import { scrape } from "@vakra-dev/reader";
import type { VercelRequest, VercelResponse } from "@vercel/node";
export default async function handler(req: VercelRequest, res: VercelResponse) {
if (req.method !== "POST") {
return res.status(405).json({ error: "Method not allowed" });
}
const { urls, formats = ["markdown"] } = req.body;
try {
const result = await scrape({
urls,
formats,
connectionToCore: process.env.BROWSERLESS_URL,
});
res.json(result);
} catch (error: any) {
res.status(500).json({ error: error.message });
}
}
export const config = {
maxDuration: 60,
};{
"functions": {
"api/scrape.ts": {
"memory": 1024,
"maxDuration": 60
}
}
}vercel env add BROWSERLESS_URLWorkers don't support Node.js natively, but you can:
// src/index.ts
export default {
async fetch(request: Request, env: Env): Promise<Response> {
if (request.method !== "POST") {
return new Response("Method not allowed", { status: 405 });
}
const { urls } = await request.json();
// Call external scraping service
const response = await fetch(env.SCRAPER_API_URL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ urls }),
});
return response;
},
};Cloudflare offers Browser Rendering API in beta:
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const browser = await puppeteer.launch(env.BROWSER);
const page = await browser.newPage();
await page.goto("https://example.com");
const html = await page.content();
await browser.close();
return new Response(html);
},
};# cloudbuild.yaml
steps:
- name: "gcr.io/cloud-builders/docker"
args: ["build", "-t", "gcr.io/$PROJECT_ID/reader", "."]
- name: "gcr.io/cloud-builders/docker"
args: ["push", "gcr.io/$PROJECT_ID/reader"]
- name: "gcr.io/cloud-builders/gcloud"
args:
- "run"
- "deploy"
- "reader"
- "--image"
- "gcr.io/$PROJECT_ID/reader"
- "--region"
- "us-central1"
- "--memory"
- "2Gi"
- "--timeout"
- "60"import * as functions from "@google-cloud/functions-framework";
import { scrape } from "@vakra-dev/reader";
functions.http("scrape", async (req, res) => {
const { urls, formats } = req.body;
try {
const result = await scrape({
urls,
formats,
connectionToCore: process.env.BROWSERLESS_URL,
});
res.json(result);
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});| Platform | Max Memory | Max Timeout |
|---|---|---|
| AWS Lambda | 10 GB | 15 min |
| Vercel | 3 GB | 60 sec (Pro: 300s) |
| Google Cloud Functions | 16 GB | 60 min |
| Cloudflare Workers | 128 MB | 30 sec (unbounded) |
// Optimize for serverless
const result = await scrape({
urls: [url], // Process one at a time
formats: ["markdown"], // Single format
timeoutMs: 30000,
connectionToCore: process.env.BROWSERLESS_URL,
});// Batch URLs when possible
const result = await scrape({
urls: ["url1", "url2", "url3"], // Multiple URLs per invocation
batchConcurrency: 3,
});import { createClient } from "@vercel/kv";
const kv = createClient({ /* config */ });
async function cachedScrape(url: string) {
// Check cache
const cached = await kv.get(`scrape:${url}`);
if (cached) return cached;
// Scrape
const result = await scrape({ urls: [url] });
// Cache for 1 hour
await kv.set(`scrape:${url}`, result, { ex: 3600 });
return result;
}// Keep connection warm
let connectionPromise: Promise<any>;
function getConnection() {
if (!connectionPromise) {
connectionPromise = initializeConnection();
}
return connectionPromise;
}- Reduce URL count per request
- Use remote browser service
- Increase function timeout
- Increase memory allocation
- Process fewer URLs
- Use streaming responses
- Use provisioned concurrency (AWS)
- Keep functions warm with scheduled pings
- Use remote browser (faster connection)
- Production Server - Traditional server setup
- Docker - Container deployment
- Troubleshooting - Common issues