Remove B&H Photo scraper (Cloudflare protection too strong)

B&H Photo Video uses aggressive Cloudflare protection that blocks
headless browsers even with stealth plugins. Removing the site-specific
scraper for now. The Puppeteer fallback remains in place for other
sites with less aggressive protection.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-21 21:17:14 -05:00
parent 58ad638641
commit c23cc8353a

View file

@ -628,134 +628,6 @@ const siteScrapers: SiteScraper[] = [
},
},
// B&H Photo Video
{
match: (url) => /bhphotovideo\.com/i.test(url),
scrape: ($) => {
let price: ParsedPrice | null = null;
let name: string | null = null;
let imageUrl: string | null = null;
let stockStatus: StockStatus = 'unknown';
// Debug: Check page title and body length
const pageTitle = $('title').text();
const bodyLength = $('body').html()?.length || 0;
console.log(`[B&H] Page title: "${pageTitle}", body length: ${bodyLength}`);
// Try to get data from JSON-LD first
try {
const scripts = $('script[type="application/ld+json"]');
console.log(`[B&H] Found ${scripts.length} JSON-LD scripts`);
scripts.each((_i, script) => {
const content = $(script).html();
if (!content) return;
try {
const data = JSON.parse(content);
console.log(`[B&H] JSON-LD type: ${data['@type']}`);
if (data['@type'] === 'Product' || data.offers) {
if (data.name && !name) {
name = data.name;
console.log(`[B&H] Found name: ${name}`);
}
if (data.image && !imageUrl) {
imageUrl = Array.isArray(data.image) ? data.image[0] : data.image;
}
if (data.offers && !price) {
const offer = Array.isArray(data.offers) ? data.offers[0] : data.offers;
console.log(`[B&H] Offer data: ${JSON.stringify(offer).slice(0, 200)}`);
if (offer.price) {
price = {
price: parseFloat(String(offer.price)),
currency: offer.priceCurrency || 'USD',
};
console.log(`[B&H] Found price from JSON-LD: ${price.price}`);
}
// Check availability from JSON-LD
if (offer.availability) {
const avail = offer.availability.toLowerCase();
if (avail.includes('instock')) {
stockStatus = 'in_stock';
} else if (avail.includes('outofstock')) {
stockStatus = 'out_of_stock';
}
}
}
}
} catch (_e) {
// JSON-LD parse error, continue
}
});
} catch (_e) {
// JSON-LD extraction error, continue
}
// Fallback to HTML selectors
if (!price) {
console.log(`[B&H] No price from JSON-LD, trying HTML selectors`);
const priceSelectors = [
'[data-selenium="pricingPrice"]',
'[data-selenium="uppedDecimalPriceFirst"]',
'.price_1DPoToKrLP1U',
'[class*="price_"] span',
'.priceInfo span[class*="price"]',
];
for (const selector of priceSelectors) {
const el = $(selector).first();
console.log(`[B&H] Selector "${selector}": found ${el.length} elements`);
if (el.length) {
const text = el.text().trim();
console.log(`[B&H] Element text: "${text.slice(0, 100)}"`);
price = parsePrice(text);
if (price) {
console.log(`[B&H] Parsed price: ${price.price}`);
break;
}
}
}
}
// Try combining dollars and cents if still no price
if (!price) {
const priceContainer = $('[data-selenium="pricingPrice"]').first();
if (priceContainer.length) {
const fullText = priceContainer.text().replace(/\s+/g, '');
price = parsePrice(fullText);
}
}
if (!name) {
name = $('h1[data-selenium="productTitle"]').text().trim() ||
$('h1[class*="title_"]').text().trim() ||
$('[data-selenium="productTitle"]').text().trim() ||
null;
}
if (!imageUrl) {
imageUrl = $('[data-selenium="mainImage"] img').attr('src') ||
$('img[data-selenium="mainImage"]').attr('src') ||
$('meta[property="og:image"]').attr('content') ||
null;
}
// Stock status from HTML
if (stockStatus === 'unknown') {
const addToCartBtn = $('[data-selenium="addToCartButton"]').length > 0;
const notifyBtn = $('[data-selenium="notifyAvailabilityButton"]').length > 0;
const outOfStockText = $('body').text().toLowerCase();
if (addToCartBtn) {
stockStatus = 'in_stock';
} else if (notifyBtn || outOfStockText.includes('notify when available') ||
outOfStockText.includes('temporarily unavailable')) {
stockStatus = 'out_of_stock';
}
}
console.log(`[B&H] Final result - name: ${name?.slice(0, 50)}, price: ${price?.price}, stock: ${stockStatus}`);
return { name, price, imageUrl, stockStatus };
},
},
];
// Generic selectors as fallback