mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-05-15 10:52:36 +02:00
Fix Amazon scraper picking up coupon prices instead of product price
- Add detection for coupon/savings containers and skip prices within them - Check parent elements for coupon-related IDs, classes, and text - Add minimum price threshold of $2 (coupons are typically $1-5) - Add fallback to parse Amazon's whole/fraction price format directly - Increase findMostLikelyPrice threshold from $0.99 to $5 This fixes the issue where $1 coupon savings were being scraped instead of the actual $25.99 product price. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
ba9e52b90f
commit
bf111e13d8
2 changed files with 105 additions and 22 deletions
|
|
@ -24,26 +24,101 @@ const siteScrapers: SiteScraper[] = [
|
|||
{
|
||||
match: (url) => /amazon\.(com|co\.uk|ca|de|fr|es|it|co\.jp|in|com\.au)/i.test(url),
|
||||
scrape: ($) => {
|
||||
// Price selectors in order of preference (sale price first)
|
||||
const priceSelectors = [
|
||||
'#corePrice_feature_div .a-price .a-offscreen',
|
||||
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
|
||||
'#priceblock_dealprice',
|
||||
'#priceblock_saleprice',
|
||||
'#priceblock_ourprice',
|
||||
'.a-price .a-offscreen',
|
||||
'#price_inside_buybox',
|
||||
'#newBuyBoxPrice',
|
||||
'span[data-a-color="price"] .a-offscreen',
|
||||
// Helper to check if element is inside a coupon/savings container
|
||||
const isInCouponContainer = (el: ReturnType<typeof $>) => {
|
||||
const parents = el.parents().toArray();
|
||||
for (const parent of parents) {
|
||||
const id = $(parent).attr('id') || '';
|
||||
const className = $(parent).attr('class') || '';
|
||||
const text = $(parent).text().toLowerCase();
|
||||
if (/coupon|savings|save\s*\$|clipcoupon|promoprice/i.test(id + className)) {
|
||||
return true;
|
||||
}
|
||||
// Check if the immediate container mentions "save" or "coupon"
|
||||
if (text.includes('save $') || text.includes('coupon') || text.includes('clip')) {
|
||||
// Only consider it a coupon if it's a small container
|
||||
if (text.length < 100) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Try to get the main displayed price from specific containers first
|
||||
// These are the primary price display areas on Amazon
|
||||
const primaryPriceContainers = [
|
||||
'#corePrice_feature_div',
|
||||
'#corePriceDisplay_desktop_feature_div',
|
||||
'#apex_desktop_newAccordionRow',
|
||||
'#apex_offerDisplay_desktop',
|
||||
];
|
||||
|
||||
let price: ParsedPrice | null = null;
|
||||
for (const selector of priceSelectors) {
|
||||
const el = $(selector).first();
|
||||
if (el.length) {
|
||||
|
||||
// First, try the primary price containers
|
||||
for (const containerId of primaryPriceContainers) {
|
||||
const container = $(containerId);
|
||||
if (!container.length) continue;
|
||||
|
||||
// Look for the main price display (not savings/coupons)
|
||||
const priceElements = container.find('.a-price .a-offscreen');
|
||||
|
||||
for (let i = 0; i < priceElements.length; i++) {
|
||||
const el = $(priceElements[i]);
|
||||
|
||||
// Skip if this is inside a coupon container
|
||||
if (isInCouponContainer(el)) continue;
|
||||
|
||||
// Skip if the parent has "savings" or similar class
|
||||
const parentClass = el.parent().attr('class') || '';
|
||||
if (/savings|coupon|save/i.test(parentClass)) continue;
|
||||
|
||||
const text = el.text().trim();
|
||||
price = parsePrice(text);
|
||||
if (price) break;
|
||||
const parsed = parsePrice(text);
|
||||
|
||||
// Validate the price is reasonable (not a $1 coupon)
|
||||
if (parsed && parsed.price >= 2) {
|
||||
price = parsed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (price) break;
|
||||
}
|
||||
|
||||
// Fallback: try other known price selectors
|
||||
if (!price) {
|
||||
const fallbackSelectors = [
|
||||
'#priceblock_dealprice',
|
||||
'#priceblock_saleprice',
|
||||
'#priceblock_ourprice',
|
||||
'#price_inside_buybox',
|
||||
'#newBuyBoxPrice',
|
||||
'span[data-a-color="price"] .a-offscreen',
|
||||
];
|
||||
|
||||
for (const selector of fallbackSelectors) {
|
||||
const el = $(selector).first();
|
||||
if (el.length && !isInCouponContainer(el)) {
|
||||
const text = el.text().trim();
|
||||
const parsed = parsePrice(text);
|
||||
if (parsed && parsed.price >= 2) {
|
||||
price = parsed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Last resort: look for the whole/fraction price format
|
||||
if (!price) {
|
||||
const whole = $('#corePrice_feature_div .a-price-whole').first().text().replace(',', '');
|
||||
const fraction = $('#corePrice_feature_div .a-price-fraction').first().text();
|
||||
if (whole) {
|
||||
const priceStr = `$${whole}${fraction ? '.' + fraction : ''}`;
|
||||
const parsed = parsePrice(priceStr);
|
||||
if (parsed && parsed.price >= 2) {
|
||||
price = parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -107,15 +107,23 @@ export function findMostLikelyPrice(prices: ParsedPrice[]): ParsedPrice | null {
|
|||
if (prices.length === 0) return null;
|
||||
if (prices.length === 1) return prices[0];
|
||||
|
||||
// Filter out very small prices (likely not product prices)
|
||||
const validPrices = prices.filter((p) => p.price >= 0.99);
|
||||
// Filter out very small prices (likely coupons, savings amounts, not actual product prices)
|
||||
// Most real products cost at least $2-3, and coupon amounts are often $1-5
|
||||
const validPrices = prices.filter((p) => p.price >= 5);
|
||||
|
||||
if (validPrices.length === 0) return prices[0];
|
||||
// If no prices above $5, try with a lower threshold but above typical coupon amounts
|
||||
if (validPrices.length === 0) {
|
||||
const lowThresholdPrices = prices.filter((p) => p.price >= 2);
|
||||
if (lowThresholdPrices.length > 0) {
|
||||
lowThresholdPrices.sort((a, b) => a.price - b.price);
|
||||
return lowThresholdPrices[0];
|
||||
}
|
||||
// Fall back to original list if nothing matches
|
||||
return prices[0];
|
||||
}
|
||||
|
||||
// Sort by price and pick the middle one (often the actual price)
|
||||
// This helps avoid picking shipping costs or discounts
|
||||
// Sort by price - the lowest valid price is often the sale/current price
|
||||
validPrices.sort((a, b) => a.price - b.price);
|
||||
|
||||
// Return the first (lowest) valid price - often the current/sale price
|
||||
return validPrices[0];
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue