Fix TypeScript errors in scraper

- Fix cheerio import to use named exports
- Add proper interfaces for JSON-LD data
- Fix type annotations for CheerioAPI

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-20 14:09:10 -05:00
parent 324d47d8b1
commit 93b6338e99

View file

@ -1,5 +1,5 @@
import axios from 'axios'; import axios from 'axios';
import * as cheerio from 'cheerio'; import { load, type CheerioAPI } from 'cheerio';
import { import {
parsePrice, parsePrice,
ParsedPrice, ParsedPrice,
@ -77,7 +77,7 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
}; };
try { try {
const response = await axios.get(url, { const response = await axios.get<string>(url, {
headers: { headers: {
'User-Agent': 'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
@ -92,7 +92,7 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
maxRedirects: 5, maxRedirects: 5,
}); });
const $ = cheerio.load(response.data); const $ = load(response.data);
// Try to extract from JSON-LD structured data first // Try to extract from JSON-LD structured data first
const jsonLdData = extractJsonLd($); const jsonLdData = extractJsonLd($);
@ -131,8 +131,21 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
return result; return result;
} }
interface JsonLdProduct {
'@type'?: string;
'@graph'?: JsonLdProduct[];
name?: string;
image?: string | string[] | { url?: string };
offers?: JsonLdOffer | JsonLdOffer[];
}
interface JsonLdOffer {
price?: string | number;
priceCurrency?: string;
}
function extractJsonLd( function extractJsonLd(
$: cheerio.CheerioAPI $: CheerioAPI
): { name?: string; price?: ParsedPrice; image?: string } | null { ): { name?: string; price?: ParsedPrice; image?: string } | null {
try { try {
const scripts = $('script[type="application/ld+json"]'); const scripts = $('script[type="application/ld+json"]');
@ -140,7 +153,7 @@ function extractJsonLd(
const content = $(scripts[i]).html(); const content = $(scripts[i]).html();
if (!content) continue; if (!content) continue;
const data = JSON.parse(content); const data = JSON.parse(content) as JsonLdProduct | JsonLdProduct[];
const product = findProduct(data); const product = findProduct(data);
if (product) { if (product) {
@ -155,49 +168,50 @@ function extractJsonLd(
const offer = Array.isArray(product.offers) const offer = Array.isArray(product.offers)
? product.offers[0] ? product.offers[0]
: product.offers; : product.offers;
if (offer.price) { if (offer && offer.price) {
result.price = { result.price = {
price: parseFloat(offer.price), price: parseFloat(String(offer.price)),
currency: offer.priceCurrency || 'USD', currency: offer.priceCurrency || 'USD',
}; };
} }
} }
if (product.image) { if (product.image) {
result.image = Array.isArray(product.image) if (Array.isArray(product.image)) {
? product.image[0] result.image = product.image[0];
: typeof product.image === 'string' } else if (typeof product.image === 'string') {
? product.image result.image = product.image;
: product.image.url; } else if (product.image.url) {
result.image = product.image.url;
}
} }
return result; return result;
} }
} }
} catch { } catch (_e) {
// JSON parse error, continue with other methods // JSON parse error, continue with other methods
} }
return null; return null;
} }
function findProduct(data: unknown): Record<string, unknown> | null { function findProduct(data: JsonLdProduct | JsonLdProduct[]): JsonLdProduct | null {
if (!data || typeof data !== 'object') return null; if (!data) return null;
const obj = data as Record<string, unknown>;
if (obj['@type'] === 'Product') {
return obj;
}
if (Array.isArray(data)) { if (Array.isArray(data)) {
for (const item of data) { for (const item of data) {
const found = findProduct(item); const found = findProduct(item);
if (found) return found; if (found) return found;
} }
return null;
} }
if (obj['@graph'] && Array.isArray(obj['@graph'])) { if (data['@type'] === 'Product') {
for (const item of obj['@graph']) { return data;
}
if (data['@graph'] && Array.isArray(data['@graph'])) {
for (const item of data['@graph']) {
const found = findProduct(item); const found = findProduct(item);
if (found) return found; if (found) return found;
} }
@ -206,7 +220,7 @@ function findProduct(data: unknown): Record<string, unknown> | null {
return null; return null;
} }
function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null { function extractPrice($: CheerioAPI): ParsedPrice | null {
const prices: ParsedPrice[] = []; const prices: ParsedPrice[] = [];
for (const selector of priceSelectors) { for (const selector of priceSelectors) {
@ -226,7 +240,7 @@ function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null {
return findMostLikelyPrice(prices); return findMostLikelyPrice(prices);
} }
function extractName($: cheerio.CheerioAPI): string | null { function extractName($: CheerioAPI): string | null {
for (const selector of nameSelectors) { for (const selector of nameSelectors) {
const element = $(selector).first(); const element = $(selector).first();
if (element.length) { if (element.length) {
@ -239,7 +253,7 @@ function extractName($: cheerio.CheerioAPI): string | null {
return null; return null;
} }
function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null { function extractImage($: CheerioAPI, baseUrl: string): string | null {
for (const selector of imageSelectors) { for (const selector of imageSelectors) {
const element = $(selector).first(); const element = $(selector).first();
if (element.length) { if (element.length) {
@ -252,7 +266,7 @@ function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null {
// Handle relative URLs // Handle relative URLs
try { try {
return new URL(src, baseUrl).href; return new URL(src, baseUrl).href;
} catch { } catch (_e) {
return src; return src;
} }
} }