mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-05-10 00:02:40 +02:00
Fix TypeScript errors in scraper
- Fix cheerio import to use named exports - Add proper interfaces for JSON-LD data - Fix type annotations for CheerioAPI Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
324d47d8b1
commit
93b6338e99
1 changed files with 41 additions and 27 deletions
|
|
@ -1,5 +1,5 @@
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import * as cheerio from 'cheerio';
|
import { load, type CheerioAPI } from 'cheerio';
|
||||||
import {
|
import {
|
||||||
parsePrice,
|
parsePrice,
|
||||||
ParsedPrice,
|
ParsedPrice,
|
||||||
|
|
@ -77,7 +77,7 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await axios.get(url, {
|
const response = await axios.get<string>(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent':
|
'User-Agent':
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
|
@ -92,7 +92,7 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
||||||
maxRedirects: 5,
|
maxRedirects: 5,
|
||||||
});
|
});
|
||||||
|
|
||||||
const $ = cheerio.load(response.data);
|
const $ = load(response.data);
|
||||||
|
|
||||||
// Try to extract from JSON-LD structured data first
|
// Try to extract from JSON-LD structured data first
|
||||||
const jsonLdData = extractJsonLd($);
|
const jsonLdData = extractJsonLd($);
|
||||||
|
|
@ -131,8 +131,21 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface JsonLdProduct {
|
||||||
|
'@type'?: string;
|
||||||
|
'@graph'?: JsonLdProduct[];
|
||||||
|
name?: string;
|
||||||
|
image?: string | string[] | { url?: string };
|
||||||
|
offers?: JsonLdOffer | JsonLdOffer[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface JsonLdOffer {
|
||||||
|
price?: string | number;
|
||||||
|
priceCurrency?: string;
|
||||||
|
}
|
||||||
|
|
||||||
function extractJsonLd(
|
function extractJsonLd(
|
||||||
$: cheerio.CheerioAPI
|
$: CheerioAPI
|
||||||
): { name?: string; price?: ParsedPrice; image?: string } | null {
|
): { name?: string; price?: ParsedPrice; image?: string } | null {
|
||||||
try {
|
try {
|
||||||
const scripts = $('script[type="application/ld+json"]');
|
const scripts = $('script[type="application/ld+json"]');
|
||||||
|
|
@ -140,7 +153,7 @@ function extractJsonLd(
|
||||||
const content = $(scripts[i]).html();
|
const content = $(scripts[i]).html();
|
||||||
if (!content) continue;
|
if (!content) continue;
|
||||||
|
|
||||||
const data = JSON.parse(content);
|
const data = JSON.parse(content) as JsonLdProduct | JsonLdProduct[];
|
||||||
const product = findProduct(data);
|
const product = findProduct(data);
|
||||||
|
|
||||||
if (product) {
|
if (product) {
|
||||||
|
|
@ -155,49 +168,50 @@ function extractJsonLd(
|
||||||
const offer = Array.isArray(product.offers)
|
const offer = Array.isArray(product.offers)
|
||||||
? product.offers[0]
|
? product.offers[0]
|
||||||
: product.offers;
|
: product.offers;
|
||||||
if (offer.price) {
|
if (offer && offer.price) {
|
||||||
result.price = {
|
result.price = {
|
||||||
price: parseFloat(offer.price),
|
price: parseFloat(String(offer.price)),
|
||||||
currency: offer.priceCurrency || 'USD',
|
currency: offer.priceCurrency || 'USD',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (product.image) {
|
if (product.image) {
|
||||||
result.image = Array.isArray(product.image)
|
if (Array.isArray(product.image)) {
|
||||||
? product.image[0]
|
result.image = product.image[0];
|
||||||
: typeof product.image === 'string'
|
} else if (typeof product.image === 'string') {
|
||||||
? product.image
|
result.image = product.image;
|
||||||
: product.image.url;
|
} else if (product.image.url) {
|
||||||
|
result.image = product.image.url;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch (_e) {
|
||||||
// JSON parse error, continue with other methods
|
// JSON parse error, continue with other methods
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function findProduct(data: unknown): Record<string, unknown> | null {
|
function findProduct(data: JsonLdProduct | JsonLdProduct[]): JsonLdProduct | null {
|
||||||
if (!data || typeof data !== 'object') return null;
|
if (!data) return null;
|
||||||
|
|
||||||
const obj = data as Record<string, unknown>;
|
|
||||||
|
|
||||||
if (obj['@type'] === 'Product') {
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(data)) {
|
if (Array.isArray(data)) {
|
||||||
for (const item of data) {
|
for (const item of data) {
|
||||||
const found = findProduct(item);
|
const found = findProduct(item);
|
||||||
if (found) return found;
|
if (found) return found;
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (obj['@graph'] && Array.isArray(obj['@graph'])) {
|
if (data['@type'] === 'Product') {
|
||||||
for (const item of obj['@graph']) {
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data['@graph'] && Array.isArray(data['@graph'])) {
|
||||||
|
for (const item of data['@graph']) {
|
||||||
const found = findProduct(item);
|
const found = findProduct(item);
|
||||||
if (found) return found;
|
if (found) return found;
|
||||||
}
|
}
|
||||||
|
|
@ -206,7 +220,7 @@ function findProduct(data: unknown): Record<string, unknown> | null {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null {
|
function extractPrice($: CheerioAPI): ParsedPrice | null {
|
||||||
const prices: ParsedPrice[] = [];
|
const prices: ParsedPrice[] = [];
|
||||||
|
|
||||||
for (const selector of priceSelectors) {
|
for (const selector of priceSelectors) {
|
||||||
|
|
@ -226,7 +240,7 @@ function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null {
|
||||||
return findMostLikelyPrice(prices);
|
return findMostLikelyPrice(prices);
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractName($: cheerio.CheerioAPI): string | null {
|
function extractName($: CheerioAPI): string | null {
|
||||||
for (const selector of nameSelectors) {
|
for (const selector of nameSelectors) {
|
||||||
const element = $(selector).first();
|
const element = $(selector).first();
|
||||||
if (element.length) {
|
if (element.length) {
|
||||||
|
|
@ -239,7 +253,7 @@ function extractName($: cheerio.CheerioAPI): string | null {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null {
|
function extractImage($: CheerioAPI, baseUrl: string): string | null {
|
||||||
for (const selector of imageSelectors) {
|
for (const selector of imageSelectors) {
|
||||||
const element = $(selector).first();
|
const element = $(selector).first();
|
||||||
if (element.length) {
|
if (element.length) {
|
||||||
|
|
@ -252,7 +266,7 @@ function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null {
|
||||||
// Handle relative URLs
|
// Handle relative URLs
|
||||||
try {
|
try {
|
||||||
return new URL(src, baseUrl).href;
|
return new URL(src, baseUrl).href;
|
||||||
} catch {
|
} catch (_e) {
|
||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue