From c23cc8353a7e9498635790009d5d932c2e373d23 Mon Sep 17 00:00:00 2001 From: clucraft Date: Wed, 21 Jan 2026 21:17:14 -0500 Subject: [PATCH] Remove B&H Photo scraper (Cloudflare protection too strong) B&H Photo Video uses aggressive Cloudflare protection that blocks headless browsers even with stealth plugins. Removing the site-specific scraper for now. The Puppeteer fallback remains in place for other sites with less aggressive protection. Co-Authored-By: Claude Opus 4.5 --- backend/src/services/scraper.ts | 128 -------------------------------- 1 file changed, 128 deletions(-) diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index d5c8a33..5cefef6 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -628,134 +628,6 @@ const siteScrapers: SiteScraper[] = [ }, }, - // B&H Photo Video - { - match: (url) => /bhphotovideo\.com/i.test(url), - scrape: ($) => { - let price: ParsedPrice | null = null; - let name: string | null = null; - let imageUrl: string | null = null; - let stockStatus: StockStatus = 'unknown'; - - // Debug: Check page title and body length - const pageTitle = $('title').text(); - const bodyLength = $('body').html()?.length || 0; - console.log(`[B&H] Page title: "${pageTitle}", body length: ${bodyLength}`); - - // Try to get data from JSON-LD first - try { - const scripts = $('script[type="application/ld+json"]'); - console.log(`[B&H] Found ${scripts.length} JSON-LD scripts`); - scripts.each((_i, script) => { - const content = $(script).html(); - if (!content) return; - try { - const data = JSON.parse(content); - console.log(`[B&H] JSON-LD type: ${data['@type']}`); - if (data['@type'] === 'Product' || data.offers) { - if (data.name && !name) { - name = data.name; - console.log(`[B&H] Found name: ${name}`); - } - if (data.image && !imageUrl) { - imageUrl = Array.isArray(data.image) ? data.image[0] : data.image; - } - if (data.offers && !price) { - const offer = Array.isArray(data.offers) ? data.offers[0] : data.offers; - console.log(`[B&H] Offer data: ${JSON.stringify(offer).slice(0, 200)}`); - if (offer.price) { - price = { - price: parseFloat(String(offer.price)), - currency: offer.priceCurrency || 'USD', - }; - console.log(`[B&H] Found price from JSON-LD: ${price.price}`); - } - // Check availability from JSON-LD - if (offer.availability) { - const avail = offer.availability.toLowerCase(); - if (avail.includes('instock')) { - stockStatus = 'in_stock'; - } else if (avail.includes('outofstock')) { - stockStatus = 'out_of_stock'; - } - } - } - } - } catch (_e) { - // JSON-LD parse error, continue - } - }); - } catch (_e) { - // JSON-LD extraction error, continue - } - - // Fallback to HTML selectors - if (!price) { - console.log(`[B&H] No price from JSON-LD, trying HTML selectors`); - const priceSelectors = [ - '[data-selenium="pricingPrice"]', - '[data-selenium="uppedDecimalPriceFirst"]', - '.price_1DPoToKrLP1U', - '[class*="price_"] span', - '.priceInfo span[class*="price"]', - ]; - - for (const selector of priceSelectors) { - const el = $(selector).first(); - console.log(`[B&H] Selector "${selector}": found ${el.length} elements`); - if (el.length) { - const text = el.text().trim(); - console.log(`[B&H] Element text: "${text.slice(0, 100)}"`); - price = parsePrice(text); - if (price) { - console.log(`[B&H] Parsed price: ${price.price}`); - break; - } - } - } - } - - // Try combining dollars and cents if still no price - if (!price) { - const priceContainer = $('[data-selenium="pricingPrice"]').first(); - if (priceContainer.length) { - const fullText = priceContainer.text().replace(/\s+/g, ''); - price = parsePrice(fullText); - } - } - - if (!name) { - name = $('h1[data-selenium="productTitle"]').text().trim() || - $('h1[class*="title_"]').text().trim() || - $('[data-selenium="productTitle"]').text().trim() || - null; - } - - if (!imageUrl) { - imageUrl = $('[data-selenium="mainImage"] img').attr('src') || - $('img[data-selenium="mainImage"]').attr('src') || - $('meta[property="og:image"]').attr('content') || - null; - } - - // Stock status from HTML - if (stockStatus === 'unknown') { - const addToCartBtn = $('[data-selenium="addToCartButton"]').length > 0; - const notifyBtn = $('[data-selenium="notifyAvailabilityButton"]').length > 0; - const outOfStockText = $('body').text().toLowerCase(); - - if (addToCartBtn) { - stockStatus = 'in_stock'; - } else if (notifyBtn || outOfStockText.includes('notify when available') || - outOfStockText.includes('temporarily unavailable')) { - stockStatus = 'out_of_stock'; - } - } - - console.log(`[B&H] Final result - name: ${name?.slice(0, 50)}, price: ${price?.price}, stock: ${stockStatus}`); - return { name, price, imageUrl, stockStatus }; - }, - }, ]; // Generic selectors as fallback