Best Practices
Follow these best practices to build robust, efficient, and secure integrations with the WebPeek API. These guidelines will help you optimize performance, reduce costs, and provide a better experience for your users.
Security
Never Expose API Keys in Client-Side Code
API keys should only be used in server-side code. Exposing them in frontend JavaScript makes them publicly accessible and can lead to unauthorized usage.
❌ DON'T - Client-side
// Bad: API key exposed in browser
fetch('https://api.webpeek.dev/metadata?url=' + url, {
headers: {
'X-API-Key': 'wpk_live_abc123' // ⚠️ Exposed!
}
})✅ DO - Server-side proxy
// Good: Call your backend API
fetch('/api/metadata?url=' + url)
// Backend handles WebPeek API
app.get('/api/metadata', async (req, res) => {
const data = await fetchWebPeek(req.query.url);
res.json(data);
});Store API Keys Securely
Use environment variables or secure secret management systems. Never commit API keys to version control.
# .env file (add to .gitignore)
WEBPEEK_API_KEY=wpk_live_your_key_here
# .gitignore
.env
.env.local
.env.*.local// Load from environment
require('dotenv').config();
const apiKey = process.env.WEBPEEK_API_KEY;
if (!apiKey) {
throw new Error('WEBPEEK_API_KEY not configured');
}Validate User Input
Always validate and sanitize URLs before sending them to the API to prevent injection attacks and reduce errors.
function validateUrl(input) {
// Check if it's a valid URL
try {
const url = new URL(input);
// Only allow http and https
if (!['http:', 'https:'].includes(url.protocol)) {
return { valid: false, error: 'Only HTTP/HTTPS URLs allowed' };
}
// Block localhost and private IPs
const hostname = url.hostname.toLowerCase();
if (hostname === 'localhost' ||
hostname.startsWith('127.') ||
hostname.startsWith('192.168.') ||
hostname.startsWith('10.') ||
hostname === '0.0.0.0') {
return { valid: false, error: 'Private/local URLs not allowed' };
}
return { valid: true, url: url.toString() };
} catch (error) {
return { valid: false, error: 'Invalid URL format' };
}
}
// Usage
const validation = validateUrl(userInput);
if (!validation.valid) {
return res.status(400).json({ error: validation.error });
}
// Safe to use
const metadata = await fetchWebPeek(validation.url);Implement Rate Limiting on Your End
Protect your API keys from abuse by implementing rate limiting on your proxy endpoints.
import rateLimit from 'express-rate-limit';
const limiter = rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 100, // Limit each IP to 100 requests per windowMs
message: 'Too many requests from this IP'
});
// Apply to your proxy endpoint
app.use('/api/metadata', limiter);Performance & Efficiency
Implement Caching
Cache API responses to reduce redundant requests, improve response times, and lower costs.
import NodeCache from 'node-cache';
// Cache for 24 hours
const cache = new NodeCache({ stdTTL: 86400 });
async function getCachedMetadata(url) {
// Check cache first
const cached = cache.get(url);
if (cached) {
console.log('Cache hit:', url);
return cached;
}
// Fetch from API
console.log('Cache miss, fetching:', url);
const data = await fetchWebPeek(url);
// Store in cache
cache.set(url, data);
return data;
}
// Usage
const metadata = await getCachedMetadata('https://github.com');Use Redis for Distributed Caching
For production applications with multiple instances, use Redis for shared caching.
import Redis from 'ioredis';
const redis = new Redis(process.env.REDIS_URL);
async function getCachedMetadata(url) {
// Try cache
const cached = await redis.get(`webpeek:metadata:${url}`);
if (cached) {
return JSON.parse(cached);
}
// Fetch from API
const data = await fetchWebPeek(url);
// Cache for 24 hours
await redis.setex(
`webpeek:metadata:${url}`,
86400,
JSON.stringify(data)
);
return data;
}Batch Process When Possible
When processing multiple URLs, use queues and batch processing to stay within rate limits.
import PQueue from 'p-queue';
// Process with concurrency limit
const queue = new PQueue({
concurrency: 5, // Max 5 concurrent requests
interval: 60000, // Per minute
intervalCap: 100 // Max 100 requests per minute
});
async function processUrls(urls) {
const results = await Promise.all(
urls.map(url =>
queue.add(() => fetchWebPeek(url))
)
);
return results;
}
// Process large batch
const urls = ['https://github.com', 'https://stripe.com', ...];
const results = await processUrls(urls);Use Webhooks for Long-Running Operations
For resource-intensive endpoints like Performance, consider implementing a webhook pattern instead of synchronous requests.
// Queue job for background processing
async function queuePerformanceAudit(url, userId) {
const jobId = generateId();
// Store job
await db.jobs.create({
id: jobId,
url,
userId,
status: 'pending'
});
// Process in background
processInBackground(jobId, url);
return { jobId, status: 'pending' };
}
async function processInBackground(jobId, url) {
try {
const data = await fetchWebPeek(url);
await db.jobs.update(jobId, {
status: 'completed',
data
});
// Notify user (webhook, email, etc.)
await notifyUser(jobId);
} catch (error) {
await db.jobs.update(jobId, {
status: 'failed',
error: error.message
});
}
}Monitor Rate Limit Headers
Track rate limit headers to proactively manage your request volume.
async function fetchWebPeek(url) {
const response = await fetch(
`https://api.webpeek.dev/metadata?url=${encodeURIComponent(url)}`,
{
headers: { 'X-API-Key': process.env.WEBPEEK_API_KEY }
}
);
// Log rate limit status
const remaining = response.headers.get('X-RateLimit-Remaining');
const limit = response.headers.get('X-RateLimit-Limit');
console.log(`Rate limit: ${remaining}/${limit} remaining`);
// Alert if running low
if (parseInt(remaining) < 10) {
console.warn('⚠️ Rate limit running low!');
// Send alert to monitoring system
}
return response.json();
}Set Appropriate Timeouts
Different endpoints have different processing times. Set timeouts accordingly.
// Metadata endpoint - fast (500ms typical)
const metadataTimeout = 5000; // 5 seconds
// SEO Audit - moderate (2-5 seconds typical)
const seoTimeout = 15000; // 15 seconds
// Performance - slow (10-30 seconds typical)
const performanceTimeout = 45000; // 45 seconds
// Snapshot - variable (3-10 seconds typical)
const snapshotTimeout = 30000; // 30 seconds
async function fetchWithTimeout(url, endpoint, timeout) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(url, {
signal: controller.signal,
headers: { 'X-API-Key': process.env.WEBPEEK_API_KEY }
});
return response.json();
} finally {
clearTimeout(timeoutId);
}
}Error Handling
Implement Retry Logic with Exponential Backoff
Retry failed requests with exponential backoff for transient errors.
async function fetchWithRetry(url, maxRetries = 3) {
for (let i = 0; i < maxRetries; i++) {
try {
const response = await fetch(url);
// Success
if (response.ok) {
return await response.json();
}
// Don't retry client errors (except 429)
if (response.status >= 400 && response.status < 500 && response.status !== 429) {
throw new Error(`Client error: ${response.status}`);
}
// Retry server errors
if (i < maxRetries - 1) {
const delay = Math.pow(2, i) * 1000; // 1s, 2s, 4s
console.log(`Retrying after ${delay}ms...`);
await new Promise(resolve => setTimeout(resolve, delay));
}
} catch (error) {
if (i === maxRetries - 1) throw error;
}
}
}Handle Specific Error Codes
Provide specific error handling for different error codes to improve user experience.
async function handleWebPeekError(error, url) {
switch (error.code) {
case 'URL_UNREACHABLE':
return {
message: 'The website could not be reached',
suggestion: 'Please verify the URL is correct and accessible'
};
case 'TIMEOUT':
return {
message: 'The website took too long to respond',
suggestion: 'This website may be slow. Try again later.'
};
case 'ROBOTS_TXT_BLOCKED':
return {
message: 'Access to this page is restricted',
suggestion: 'The website blocks automated access to this URL'
};
case 'RATE_LIMIT_EXCEEDED':
return {
message: 'Too many requests',
suggestion: `Please try again in ${error.details.reset_after} seconds`
};
default:
return {
message: 'An error occurred',
suggestion: 'Please try again later'
};
}
}Implement Graceful Degradation
Provide fallback behavior when the API is unavailable or returns errors.
async function getMetadataWithFallback(url) {
try {
// Try WebPeek API
const data = await fetchWebPeek(url);
return {
source: 'webpeek',
...data.data
};
} catch (error) {
console.warn('WebPeek API failed, using fallback:', error.message);
// Fallback to basic parsing
return {
source: 'fallback',
title: extractTitleFromUrl(url),
description: 'Description not available',
image: '/placeholder.png',
url
};
}
}
function extractTitleFromUrl(url) {
try {
const hostname = new URL(url).hostname;
return hostname.replace('www.', '').split('.')[0];
} catch {
return 'Unknown';
}
}Log Errors for Debugging
Implement comprehensive error logging to help diagnose issues.
import winston from 'winston';
const logger = winston.createLogger({
level: 'info',
format: winston.format.json(),
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'combined.log' })
]
});
async function fetchWebPeek(url) {
try {
const response = await fetch(`https://api.webpeek.dev/metadata?url=${url}`);
const data = await response.json();
if (!response.ok) {
logger.error('WebPeek API error', {
url,
status: response.status,
code: data.error?.code,
message: data.error?.message,
requestId: data.error?.details?.request_id
});
throw new Error(data.error.message);
}
return data;
} catch (error) {
logger.error('WebPeek request failed', {
url,
error: error.message,
stack: error.stack
});
throw error;
}
}Data Handling
Validate Response Data
Always validate API responses before using them in your application.
function validateMetadata(data) {
if (!data || typeof data !== 'object') {
throw new Error('Invalid response format');
}
if (!data.success) {
throw new Error(data.error?.message || 'Request failed');
}
if (!data.data || !data.data.url) {
throw new Error('Missing required fields');
}
return data.data;
}
// Usage
try {
const response = await fetchWebPeek(url);
const metadata = validateMetadata(response);
// Safe to use metadata
} catch (error) {
console.error('Invalid response:', error);
}Sanitize Data Before Display
Sanitize titles, descriptions, and other text fields to prevent XSS attacks.
import DOMPurify from 'isomorphic-dompurify';
function sanitizeMetadata(metadata) {
return {
...metadata,
title: DOMPurify.sanitize(metadata.title || ''),
description: DOMPurify.sanitize(metadata.description || ''),
// Keep URLs but validate them
image: isValidImageUrl(metadata.image) ? metadata.image : null,
url: metadata.url // Already validated before API call
};
}
function isValidImageUrl(url) {
if (!url) return false;
try {
const parsed = new URL(url);
return ['http:', 'https:'].includes(parsed.protocol);
} catch {
return false;
}
}Handle Missing Data Gracefully
Not all websites have complete metadata. Provide defaults for missing fields.
function normalizeMetadata(data) {
return {
title: data.title || data.og?.title || 'Untitled',
description: data.description ||
data.og?.description ||
'No description available',
image: data.og?.image ||
data.twitter?.image ||
'/default-og-image.png',
url: data.url,
siteName: data.og?.site_name || extractDomain(data.url),
type: data.og?.type || 'website'
};
}
function extractDomain(url) {
try {
return new URL(url).hostname.replace('www.', '');
} catch {
return 'Unknown';
}
}Store Historical Data
For monitoring use cases, store API responses with timestamps for trend analysis.
async function trackMetadata(url) {
const data = await fetchWebPeek(url);
// Store in database
await db.metadata.create({
url,
title: data.data.title,
description: data.data.description,
image: data.data.og?.image,
fetchedAt: new Date(),
cached: data.cached || false,
processingTime: data.metadata?.processing_time_ms
});
return data;
}
// Query historical data
async function getMetadataHistory(url, days = 30) {
const since = new Date();
since.setDate(since.getDate() - days);
return db.metadata.find({
url,
fetchedAt: { $gte: since }
}).sort({ fetchedAt: -1 });
}Monitoring & Observability
Track API Usage
Monitor your API usage to stay within limits and optimize costs.
import prometheus from 'prom-client';
// Create metrics
const apiCallCounter = new prometheus.Counter({
name: 'webpeek_api_calls_total',
help: 'Total WebPeek API calls',
labelNames: ['endpoint', 'status']
});
const apiDuration = new prometheus.Histogram({
name: 'webpeek_api_duration_seconds',
help: 'WebPeek API call duration',
labelNames: ['endpoint']
});
async function fetchWebPeek(url, endpoint = 'metadata') {
const timer = apiDuration.startTimer({ endpoint });
try {
const response = await fetch(`https://api.webpeek.dev/${endpoint}?url=${url}`);
const data = await response.json();
apiCallCounter.inc({
endpoint,
status: response.ok ? 'success' : 'error'
});
return data;
} catch (error) {
apiCallCounter.inc({ endpoint, status: 'error' });
throw error;
} finally {
timer();
}
}Set Up Alerts
Configure alerts for rate limit warnings, error spikes, and quota exhaustion.
async function checkRateLimitStatus() {
const response = await fetch('https://api.webpeek.dev/usage', {
headers: { 'X-API-Key': process.env.WEBPEEK_API_KEY }
});
const data = await response.json();
const usage = data.data.usage.total;
// Alert if usage > 80%
const percentUsed = (usage.used / usage.limit) * 100;
if (percentUsed > 80) {
await sendAlert({
severity: 'warning',
message: `WebPeek API usage at ${percentUsed.toFixed(1)}%`,
details: {
used: usage.used,
limit: usage.limit,
remaining: usage.remaining
}
});
}
// Alert if usage > 95%
if (percentUsed > 95) {
await sendAlert({
severity: 'critical',
message: 'WebPeek API quota nearly exhausted',
details: {
remaining: usage.remaining,
resetDate: new Date(data.data.period.end)
}
});
}
}
// Run check periodically
setInterval(checkRateLimitStatus, 3600000); // Every hourMonitor Response Times
Track API response times to identify performance issues.
async function fetchWithMetrics(url, endpoint) {
const startTime = Date.now();
try {
const data = await fetchWebPeek(url, endpoint);
const duration = Date.now() - startTime;
// Log slow requests
if (duration > 5000) {
logger.warn('Slow API response', {
endpoint,
url,
duration,
processingTime: data.metadata?.processing_time_ms
});
}
// Send to analytics
analytics.track('api_call', {
endpoint,
duration,
cached: data.cached,
success: true
});
return data;
} catch (error) {
const duration = Date.now() - startTime;
analytics.track('api_call', {
endpoint,
duration,
success: false,
error: error.message
});
throw error;
}
}Health Checks
Implement health check endpoints to monitor your integration status.
app.get('/health/webpeek', async (req, res) => {
try {
// Simple test request
const response = await fetch(
'https://api.webpeek.dev/metadata?url=https://example.com',
{
headers: { 'X-API-Key': process.env.WEBPEEK_API_KEY },
signal: AbortSignal.timeout(5000)
}
);
if (response.ok) {
const remaining = response.headers.get('X-RateLimit-Remaining');
res.json({
status: 'healthy',
rateLimitRemaining: remaining,
timestamp: new Date()
});
} else {
res.status(503).json({
status: 'unhealthy',
error: `HTTP ${response.status}`,
timestamp: new Date()
});
}
} catch (error) {
res.status(503).json({
status: 'unhealthy',
error: error.message,
timestamp: new Date()
});
}
});Testing
Use Test URLs
Use well-known, stable URLs for testing to ensure consistent results.
// Good test URLs (stable, fast, complete metadata)
const TEST_URLS = {
basic: 'https://example.com',
richMetadata: 'https://github.com',
largePage: 'https://wikipedia.org',
dynamic: 'https://twitter.com/github'
};
describe('WebPeek Integration', () => {
it('should fetch basic metadata', async () => {
const data = await fetchWebPeek(TEST_URLS.basic);
expect(data.success).toBe(true);
expect(data.data.url).toBe(TEST_URLS.basic);
});
});Mock API Responses in Tests
Mock API responses to avoid hitting rate limits during testing.
import nock from 'nock';
describe('Metadata fetching', () => {
beforeEach(() => {
// Mock WebPeek API
nock('https://api.webpeek.dev')
.get('/metadata')
.query({ url: 'https://github.com' })
.reply(200, {
success: true,
data: {
url: 'https://github.com',
title: 'GitHub',
description: 'Where the world builds software',
og: {
title: 'GitHub',
image: 'https://github.githubassets.com/images/og.png'
}
}
});
});
it('should parse metadata correctly', async () => {
const data = await fetchWebPeek('https://github.com');
expect(data.data.title).toBe('GitHub');
});
});Test Error Scenarios
Test how your application handles different error conditions.
describe('Error handling', () => {
it('should handle invalid URLs', async () => {
nock('https://api.webpeek.dev')
.get('/metadata')
.query(true)
.reply(400, {
success: false,
error: {
code: 'INVALID_URL',
message: 'The provided URL is invalid'
}
});
await expect(fetchWebPeek('invalid-url'))
.rejects.toThrow('The provided URL is invalid');
});
it('should handle rate limits', async () => {
nock('https://api.webpeek.dev')
.get('/metadata')
.query(true)
.reply(429, {
success: false,
error: {
code: 'RATE_LIMIT_EXCEEDED',
message: 'Rate limit exceeded',
details: { reset_after: 60 }
}
});
await expect(fetchWebPeek('https://github.com'))
.rejects.toThrow(/rate limit/i);
});
});Production Checklist
Before deploying to production, ensure you've implemented these critical practices:
API keys stored in environment variables, not in code
Never commit API keys to version control
Server-side proxy implemented (no client-side API calls)
Protect API keys from exposure in browser
Caching implemented (Redis, in-memory, or database)
Reduce costs and improve response times
Rate limit monitoring and alerts configured
Get notified before hitting limits
Error handling with retry logic implemented
Handle transient failures gracefully
Input validation for user-provided URLs
Prevent invalid requests and security issues
Appropriate timeouts set for each endpoint
Prevent hanging requests
Error logging and monitoring configured
Track issues in production
Response data validation and sanitization
Prevent XSS and handle missing data
Health check endpoint implemented
Monitor integration status
Tests written for success and error scenarios
Ensure reliability before deployment
Graceful degradation strategy in place
Maintain UX when API is unavailable
Endpoint-Specific Best Practices
Each WebPeek API endpoint has unique characteristics and optimization strategies. Follow these endpoint-specific best practices for optimal results.
Performance Endpoint
Choose the Right Device Type
Always test mobile performance since most traffic is mobile and Google uses mobile-first indexing.
// Recommended: Mobile-first testing
const metrics = await fetchPerformance('https://example.com', {
device: 'mobile' // Default and recommended
});
// Also test tablet and desktop for comprehensive coverage
const tabletMetrics = await fetchPerformance(url, { device: 'tablet' });
const desktopMetrics = await fetchPerformance(url, { device: 'desktop' });Select Appropriate Wait Conditions
Choose the right waitUntil condition based on your page type:
// Static sites - fast measurements
{ waitUntil: 'load', timeout: 10000 }
// Traditional server-rendered pages
{ waitUntil: 'load', timeout: 30000 }
// Modern SPAs with some async content (default)
{ waitUntil: 'networkidle2', timeout: 30000 }
// SPAs with heavy async content - most accurate
{ waitUntil: 'networkidle0', timeout: 45000 }Prioritize Core Web Vitals by Weight
Focus optimization efforts on the metrics that matter most:
- • LCP (35%) - Most important for perceived load speed
- • INP (35%) - Critical for interactivity
- • CLS (15%) - Important for visual stability
- • FCP (10%) - Secondary load metric
- • TTFB (5%) - Server optimization
Understand Caching Behavior
Results are cached for 24 hours. To bypass cache, change any parameter:
// These create DIFFERENT cache entries:
fetchPerformance('https://example.com', { device: 'mobile' })
fetchPerformance('https://example.com', { device: 'desktop' })
fetchPerformance('https://example.com', { device: 'mobile', timeout: 30001 })
// Check if response was cached
if (response.cache.cached) {
console.log(`Cached result, expires: ${response.cache.expires_at}`);
}SEO Audit Endpoint
Use Field Filtering for Performance
Request only the fields you need to minimize payload size and improve response times:
// Dashboard overview - minimal payload
const overview = await fetchSEO(url, {
fields: 'scoring,summary,diff'
});
// Developer tasks - actionable items
const tasks = await fetchSEO(url, {
fields: 'issues,metrics,web_vitals'
});
// Technical SEO focus
const technical = await fetchSEO(url, {
fields: 'indexability,http,structured_data'
});Enable Comparison for Progress Tracking
Use compare=true to track improvements over time and identify regressions:
const audit = await fetchSEO(url, {
device: 'mobile',
compare: true,
fields: 'scoring,summary,diff'
});
// Check for improvements or regressions
if (audit.diff) {
console.log(`Score change: ${audit.diff.score_delta > 0 ? '+' : ''}${audit.diff.score_delta}`);
console.log(`New issues: ${audit.diff.new_issues.length}`);
console.log(`Resolved: ${audit.diff.resolved_issues.length}`);
if (audit.diff.regressions > 0) {
console.warn(`⚠️ ${audit.diff.regressions} regressions detected!`);
}
}Prioritize Issue Fixes
Focus on critical issues first, then high-impact warnings:
const criticalIssues = audit.issues.filter(i => i.severity === 'critical');
const highImpactWarnings = audit.issues.filter(
i => i.severity === 'warning' && i.impact === 'high'
);
const quickWins = audit.issues.filter(i => i.estimated_effort === 'minutes');
// Fix priority order:
// 1. All critical issues
// 2. High impact warnings
// 3. Quick wins (minutes of effort)
// 4. Medium impact warnings
// 5. Low priority itemsMonitor Indexability Status
Always check indexability before optimizing SEO:
const { indexability } = audit;
switch (indexability.effective_status) {
case 'indexable':
console.log('✓ Page is indexable');
break;
case 'noindex':
console.error('✗ Page blocked from indexing (noindex)');
break;
case 'blocked':
console.error('✗ Robots.txt blocks access');
break;
case 'canonicalized':
console.warn('⚠️ Page canonicalized to another URL');
break;
}Snapshot Endpoint
Use WebP Format for Optimal File Size
WebP provides excellent compression with minimal quality loss:
// Best: WebP with good quality (default)
const snapshot = await captureSnapshot(url, {
format: 'webp',
quality: 'medium' // or 80
});
// Use PNG only when you need lossless or transparency
const pngSnapshot = await captureSnapshot(url, {
format: 'png',
omitBackground: true // Transparent background
});
// JPEG for broad compatibility (larger files)
const jpegSnapshot = await captureSnapshot(url, {
format: 'jpeg',
quality: 85
});Enable Ad Blocking for Consistency
Block ads and cookie banners for cleaner, more consistent screenshots:
const snapshot = await captureSnapshot(url, {
device: 'mobile',
blockAds: true,
blockCookieBanners: true,
format: 'webp',
quality: 'high'
});
// Benefits:
// • Visual consistency (no random ads)
// • Faster page load (30-50% reduction)
// • Smaller screenshot files
// • Better user experience
console.log(`Blocked ${snapshot.metadata.requests_blocked} requests`);Use Element Targeting for Performance
Capture specific elements instead of full pages when possible:
// Better: Capture just the header (faster, smaller file)
const header = await captureSnapshot(url, {
selector: '#header',
format: 'png',
omitBackground: true
});
// Less optimal: Capture full page and crop client-side
const fullPage = await captureSnapshot(url, {
fullPage: true
});
// Good use cases for selectors:
// • Social media cards (specific sections)
// • Navigation bars
// • Product images
// • Content sectionsHandle Tall Pages Carefully
Chrome has rendering limits around 16,384px. Handle tall pages appropriately:
try {
const snapshot = await captureSnapshot(url, {
fullPage: true
});
} catch (error) {
if (error.code === 'PAGE_TOO_TALL') {
// Option 1: Capture viewport only
const viewport = await captureSnapshot(url, { fullPage: false });
// Option 2: Reduce viewport width
const narrower = await captureSnapshot(url, {
fullPage: true,
viewport: { width: 1280, height: 1024 }
});
// Option 3: Capture specific sections
const section = await captureSnapshot(url, {
selector: '#main-content'
});
}
}Adjust Wait Times for Dynamic Content
Use waitFor and waitUntil for pages with animations or async content:
// For pages with animations
const animated = await captureSnapshot(url, {
waitFor: 2000, // Wait 2s after load
waitUntil: 'networkidle2'
});
// For SPAs with heavy async content
const spa = await captureSnapshot(url, {
waitFor: 3000,
waitUntil: 'networkidle0', // Strict network idle
timeout: 45000 // Longer timeout
});
// For fast static sites
const static = await captureSnapshot(url, {
waitUntil: 'load', // Just wait for load event
timeout: 10000 // Short timeout
});Metadata Endpoint
Use Field Filtering to Reduce Payload
Request only the metadata you need - can reduce payload by 90%:
// Social sharing preview - minimal payload (~20% of full)
const social = await fetchMetadata(url, {
fields: 'basic,og,twitter'
});
// SEO essentials
const seo = await fetchMetadata(url, {
fields: 'basic,indexability,schema'
});
// Technology detection
const tech = await fetchMetadata(url, {
fields: 'tier3' // dominant_color, social_handles, tech_stack
});
// Full metadata (default)
const full = await fetchMetadata(url); // or fields: 'all'Set Appropriate Limits for Arrays
Control response size with max_images and max_links parameters:
// Limit images and links for faster responses
const metadata = await fetchMetadata(url, {
max_images: 10, // Default: 25, Max: 100
max_links: 50 // Default: 100, Max: 500
});
// Check if arrays were truncated
if (metadata.truncated?.images) {
console.log('Image list was truncated');
}
if (metadata.truncated?.links) {
console.log('Link list was truncated');
}Validate Canonical URLs
Always check canonical metadata for SEO issues:
const { canonical } = metadata;
if (!canonical?.is_self_canonical) {
console.warn(`⚠️ Non-self-referencing canonical: ${canonical.href}`);
}
if (!canonical?.resolves) {
console.error('✗ Canonical URL does not resolve');
}
if (canonical?.status !== 200) {
console.error(`✗ Canonical returns HTTP ${canonical.status}`);
}
// Good canonical setup:
// ✓ is_self_canonical: true
// ✓ resolves: true
// ✓ status: 200Handle Missing Data Gracefully
Not all websites have complete metadata. Provide sensible defaults:
function normalizeMetadata(data) {
return {
title: data.title || data.og?.title || 'Untitled',
description: data.description ||
data.og?.description ||
'No description available',
image: data.og?.image ||
data.twitter?.image ||
'/default-og-image.png',
siteName: data.og?.site_name ||
new URL(data.url).hostname.replace('www.', ''),
author: data.author || 'Unknown',
language: data.language || 'en'
};
}
const normalized = normalizeMetadata(metadata);Check for Redirects
The API automatically follows redirects. Check final_url to detect them:
if (metadata.final_url !== metadata.url) {
console.log(`Redirected from: ${metadata.url}`);
console.log(`Redirected to: ${metadata.final_url}`);
// Check redirect chain
if (metadata.http?.redirect_chain?.length > 0) {
console.log('Redirect chain:', metadata.http.redirect_chain);
}
// Update your records with the final URL
await db.urls.update({
original: metadata.url,
final: metadata.final_url
});
}