From 25936f4c9d608941182b896d4040ed7c3e2b4c43 Mon Sep 17 00:00:00 2001 From: clucraft Date: Fri, 23 Jan 2026 13:21:07 -0500 Subject: [PATCH] Add Puppeteer fallback for JavaScript-rendered prices - If no price found in static HTML, automatically try headless browser - Re-runs all extraction methods on browser-rendered HTML - Fixes price extraction for Magento, React, Vue, and other JS-heavy sites - AI extraction now also benefits from rendered HTML Co-Authored-By: Claude Opus 4.5 --- backend/src/services/scraper.ts | 57 +++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index 2de96d2..443a59f 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -822,6 +822,63 @@ export async function scrapeProduct(url: string, userId?: number): Promise s.match(url)); + if (siteScraper) { + const siteResult = siteScraper.scrape($browser, url); + if (!result.name && siteResult.name) result.name = siteResult.name; + if (!result.price && siteResult.price) result.price = siteResult.price; + if (!result.imageUrl && siteResult.imageUrl) result.imageUrl = siteResult.imageUrl; + if (result.stockStatus === 'unknown' && siteResult.stockStatus) { + result.stockStatus = siteResult.stockStatus; + } + } + + // Try JSON-LD from browser-rendered HTML + if (!result.price) { + const jsonLdData = extractJsonLd($browser); + if (jsonLdData) { + if (!result.name && jsonLdData.name) result.name = jsonLdData.name; + if (!result.price && jsonLdData.price) result.price = jsonLdData.price; + if (!result.imageUrl && jsonLdData.image) result.imageUrl = jsonLdData.image; + if (result.stockStatus === 'unknown' && jsonLdData.stockStatus) { + result.stockStatus = jsonLdData.stockStatus; + } + } + } + + // Try generic extraction from browser-rendered HTML + if (!result.price) { + result.price = extractGenericPrice($browser); + } + if (!result.name) { + result.name = extractGenericName($browser); + } + if (!result.imageUrl) { + result.imageUrl = extractGenericImage($browser, url); + } + if (result.stockStatus === 'unknown') { + result.stockStatus = extractGenericStockStatus($browser); + } + + if (result.price) { + console.log(`[Scraper] Successfully extracted price ${result.price.price} ${result.price.currency} using headless browser`); + } + } catch (browserError) { + console.error(`[Scraper] Browser fallback failed for ${url}:`, browserError); + } + } + // If we have a price and userId is provided, try AI verification if (result.price && userId && html) { try {