diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index 2de96d2..443a59f 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -822,6 +822,63 @@ export async function scrapeProduct(url: string, userId?: number): Promise s.match(url)); + if (siteScraper) { + const siteResult = siteScraper.scrape($browser, url); + if (!result.name && siteResult.name) result.name = siteResult.name; + if (!result.price && siteResult.price) result.price = siteResult.price; + if (!result.imageUrl && siteResult.imageUrl) result.imageUrl = siteResult.imageUrl; + if (result.stockStatus === 'unknown' && siteResult.stockStatus) { + result.stockStatus = siteResult.stockStatus; + } + } + + // Try JSON-LD from browser-rendered HTML + if (!result.price) { + const jsonLdData = extractJsonLd($browser); + if (jsonLdData) { + if (!result.name && jsonLdData.name) result.name = jsonLdData.name; + if (!result.price && jsonLdData.price) result.price = jsonLdData.price; + if (!result.imageUrl && jsonLdData.image) result.imageUrl = jsonLdData.image; + if (result.stockStatus === 'unknown' && jsonLdData.stockStatus) { + result.stockStatus = jsonLdData.stockStatus; + } + } + } + + // Try generic extraction from browser-rendered HTML + if (!result.price) { + result.price = extractGenericPrice($browser); + } + if (!result.name) { + result.name = extractGenericName($browser); + } + if (!result.imageUrl) { + result.imageUrl = extractGenericImage($browser, url); + } + if (result.stockStatus === 'unknown') { + result.stockStatus = extractGenericStockStatus($browser); + } + + if (result.price) { + console.log(`[Scraper] Successfully extracted price ${result.price.price} ${result.price.currency} using headless browser`); + } + } catch (browserError) { + console.error(`[Scraper] Browser fallback failed for ${url}:`, browserError); + } + } + // If we have a price and userId is provided, try AI verification if (result.price && userId && html) { try {