mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-05-05 13:53:00 +02:00
Remove B&H Photo scraper (Cloudflare protection too strong)
B&H Photo Video uses aggressive Cloudflare protection that blocks headless browsers even with stealth plugins. Removing the site-specific scraper for now. The Puppeteer fallback remains in place for other sites with less aggressive protection. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
58ad638641
commit
c23cc8353a
1 changed files with 0 additions and 128 deletions
|
|
@ -628,134 +628,6 @@ const siteScrapers: SiteScraper[] = [
|
|||
},
|
||||
},
|
||||
|
||||
// B&H Photo Video
|
||||
{
|
||||
match: (url) => /bhphotovideo\.com/i.test(url),
|
||||
scrape: ($) => {
|
||||
let price: ParsedPrice | null = null;
|
||||
let name: string | null = null;
|
||||
let imageUrl: string | null = null;
|
||||
let stockStatus: StockStatus = 'unknown';
|
||||
|
||||
// Debug: Check page title and body length
|
||||
const pageTitle = $('title').text();
|
||||
const bodyLength = $('body').html()?.length || 0;
|
||||
console.log(`[B&H] Page title: "${pageTitle}", body length: ${bodyLength}`);
|
||||
|
||||
// Try to get data from JSON-LD first
|
||||
try {
|
||||
const scripts = $('script[type="application/ld+json"]');
|
||||
console.log(`[B&H] Found ${scripts.length} JSON-LD scripts`);
|
||||
scripts.each((_i, script) => {
|
||||
const content = $(script).html();
|
||||
if (!content) return;
|
||||
try {
|
||||
const data = JSON.parse(content);
|
||||
console.log(`[B&H] JSON-LD type: ${data['@type']}`);
|
||||
if (data['@type'] === 'Product' || data.offers) {
|
||||
if (data.name && !name) {
|
||||
name = data.name;
|
||||
console.log(`[B&H] Found name: ${name}`);
|
||||
}
|
||||
if (data.image && !imageUrl) {
|
||||
imageUrl = Array.isArray(data.image) ? data.image[0] : data.image;
|
||||
}
|
||||
if (data.offers && !price) {
|
||||
const offer = Array.isArray(data.offers) ? data.offers[0] : data.offers;
|
||||
console.log(`[B&H] Offer data: ${JSON.stringify(offer).slice(0, 200)}`);
|
||||
if (offer.price) {
|
||||
price = {
|
||||
price: parseFloat(String(offer.price)),
|
||||
currency: offer.priceCurrency || 'USD',
|
||||
};
|
||||
console.log(`[B&H] Found price from JSON-LD: ${price.price}`);
|
||||
}
|
||||
// Check availability from JSON-LD
|
||||
if (offer.availability) {
|
||||
const avail = offer.availability.toLowerCase();
|
||||
if (avail.includes('instock')) {
|
||||
stockStatus = 'in_stock';
|
||||
} else if (avail.includes('outofstock')) {
|
||||
stockStatus = 'out_of_stock';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (_e) {
|
||||
// JSON-LD parse error, continue
|
||||
}
|
||||
});
|
||||
} catch (_e) {
|
||||
// JSON-LD extraction error, continue
|
||||
}
|
||||
|
||||
// Fallback to HTML selectors
|
||||
if (!price) {
|
||||
console.log(`[B&H] No price from JSON-LD, trying HTML selectors`);
|
||||
const priceSelectors = [
|
||||
'[data-selenium="pricingPrice"]',
|
||||
'[data-selenium="uppedDecimalPriceFirst"]',
|
||||
'.price_1DPoToKrLP1U',
|
||||
'[class*="price_"] span',
|
||||
'.priceInfo span[class*="price"]',
|
||||
];
|
||||
|
||||
for (const selector of priceSelectors) {
|
||||
const el = $(selector).first();
|
||||
console.log(`[B&H] Selector "${selector}": found ${el.length} elements`);
|
||||
if (el.length) {
|
||||
const text = el.text().trim();
|
||||
console.log(`[B&H] Element text: "${text.slice(0, 100)}"`);
|
||||
price = parsePrice(text);
|
||||
if (price) {
|
||||
console.log(`[B&H] Parsed price: ${price.price}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try combining dollars and cents if still no price
|
||||
if (!price) {
|
||||
const priceContainer = $('[data-selenium="pricingPrice"]').first();
|
||||
if (priceContainer.length) {
|
||||
const fullText = priceContainer.text().replace(/\s+/g, '');
|
||||
price = parsePrice(fullText);
|
||||
}
|
||||
}
|
||||
|
||||
if (!name) {
|
||||
name = $('h1[data-selenium="productTitle"]').text().trim() ||
|
||||
$('h1[class*="title_"]').text().trim() ||
|
||||
$('[data-selenium="productTitle"]').text().trim() ||
|
||||
null;
|
||||
}
|
||||
|
||||
if (!imageUrl) {
|
||||
imageUrl = $('[data-selenium="mainImage"] img').attr('src') ||
|
||||
$('img[data-selenium="mainImage"]').attr('src') ||
|
||||
$('meta[property="og:image"]').attr('content') ||
|
||||
null;
|
||||
}
|
||||
|
||||
// Stock status from HTML
|
||||
if (stockStatus === 'unknown') {
|
||||
const addToCartBtn = $('[data-selenium="addToCartButton"]').length > 0;
|
||||
const notifyBtn = $('[data-selenium="notifyAvailabilityButton"]').length > 0;
|
||||
const outOfStockText = $('body').text().toLowerCase();
|
||||
|
||||
if (addToCartBtn) {
|
||||
stockStatus = 'in_stock';
|
||||
} else if (notifyBtn || outOfStockText.includes('notify when available') ||
|
||||
outOfStockText.includes('temporarily unavailable')) {
|
||||
stockStatus = 'out_of_stock';
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[B&H] Final result - name: ${name?.slice(0, 50)}, price: ${price?.price}, stock: ${stockStatus}`);
|
||||
return { name, price, imageUrl, stockStatus };
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
// Generic selectors as fallback
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue