From bf111e13d8781cf1ecc8130983bc04acd8342cf8 Mon Sep 17 00:00:00 2001 From: clucraft Date: Tue, 20 Jan 2026 20:46:17 -0500 Subject: [PATCH] Fix Amazon scraper picking up coupon prices instead of product price - Add detection for coupon/savings containers and skip prices within them - Check parent elements for coupon-related IDs, classes, and text - Add minimum price threshold of $2 (coupons are typically $1-5) - Add fallback to parse Amazon's whole/fraction price format directly - Increase findMostLikelyPrice threshold from $0.99 to $5 This fixes the issue where $1 coupon savings were being scraped instead of the actual $25.99 product price. Co-Authored-By: Claude Opus 4.5 --- backend/src/services/scraper.ts | 107 ++++++++++++++++++++++++++----- backend/src/utils/priceParser.ts | 20 ++++-- 2 files changed, 105 insertions(+), 22 deletions(-) diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index 1c0fa2e..f9a6190 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -24,26 +24,101 @@ const siteScrapers: SiteScraper[] = [ { match: (url) => /amazon\.(com|co\.uk|ca|de|fr|es|it|co\.jp|in|com\.au)/i.test(url), scrape: ($) => { - // Price selectors in order of preference (sale price first) - const priceSelectors = [ - '#corePrice_feature_div .a-price .a-offscreen', - '#corePriceDisplay_desktop_feature_div .a-price .a-offscreen', - '#priceblock_dealprice', - '#priceblock_saleprice', - '#priceblock_ourprice', - '.a-price .a-offscreen', - '#price_inside_buybox', - '#newBuyBoxPrice', - 'span[data-a-color="price"] .a-offscreen', + // Helper to check if element is inside a coupon/savings container + const isInCouponContainer = (el: ReturnType) => { + const parents = el.parents().toArray(); + for (const parent of parents) { + const id = $(parent).attr('id') || ''; + const className = $(parent).attr('class') || ''; + const text = $(parent).text().toLowerCase(); + if (/coupon|savings|save\s*\$|clipcoupon|promoprice/i.test(id + className)) { + return true; + } + // Check if the immediate container mentions "save" or "coupon" + if (text.includes('save $') || text.includes('coupon') || text.includes('clip')) { + // Only consider it a coupon if it's a small container + if (text.length < 100) return true; + } + } + return false; + }; + + // Try to get the main displayed price from specific containers first + // These are the primary price display areas on Amazon + const primaryPriceContainers = [ + '#corePrice_feature_div', + '#corePriceDisplay_desktop_feature_div', + '#apex_desktop_newAccordionRow', + '#apex_offerDisplay_desktop', ]; let price: ParsedPrice | null = null; - for (const selector of priceSelectors) { - const el = $(selector).first(); - if (el.length) { + + // First, try the primary price containers + for (const containerId of primaryPriceContainers) { + const container = $(containerId); + if (!container.length) continue; + + // Look for the main price display (not savings/coupons) + const priceElements = container.find('.a-price .a-offscreen'); + + for (let i = 0; i < priceElements.length; i++) { + const el = $(priceElements[i]); + + // Skip if this is inside a coupon container + if (isInCouponContainer(el)) continue; + + // Skip if the parent has "savings" or similar class + const parentClass = el.parent().attr('class') || ''; + if (/savings|coupon|save/i.test(parentClass)) continue; + const text = el.text().trim(); - price = parsePrice(text); - if (price) break; + const parsed = parsePrice(text); + + // Validate the price is reasonable (not a $1 coupon) + if (parsed && parsed.price >= 2) { + price = parsed; + break; + } + } + + if (price) break; + } + + // Fallback: try other known price selectors + if (!price) { + const fallbackSelectors = [ + '#priceblock_dealprice', + '#priceblock_saleprice', + '#priceblock_ourprice', + '#price_inside_buybox', + '#newBuyBoxPrice', + 'span[data-a-color="price"] .a-offscreen', + ]; + + for (const selector of fallbackSelectors) { + const el = $(selector).first(); + if (el.length && !isInCouponContainer(el)) { + const text = el.text().trim(); + const parsed = parsePrice(text); + if (parsed && parsed.price >= 2) { + price = parsed; + break; + } + } + } + } + + // Last resort: look for the whole/fraction price format + if (!price) { + const whole = $('#corePrice_feature_div .a-price-whole').first().text().replace(',', ''); + const fraction = $('#corePrice_feature_div .a-price-fraction').first().text(); + if (whole) { + const priceStr = `$${whole}${fraction ? '.' + fraction : ''}`; + const parsed = parsePrice(priceStr); + if (parsed && parsed.price >= 2) { + price = parsed; + } } } diff --git a/backend/src/utils/priceParser.ts b/backend/src/utils/priceParser.ts index 89268c9..97a767f 100644 --- a/backend/src/utils/priceParser.ts +++ b/backend/src/utils/priceParser.ts @@ -107,15 +107,23 @@ export function findMostLikelyPrice(prices: ParsedPrice[]): ParsedPrice | null { if (prices.length === 0) return null; if (prices.length === 1) return prices[0]; - // Filter out very small prices (likely not product prices) - const validPrices = prices.filter((p) => p.price >= 0.99); + // Filter out very small prices (likely coupons, savings amounts, not actual product prices) + // Most real products cost at least $2-3, and coupon amounts are often $1-5 + const validPrices = prices.filter((p) => p.price >= 5); - if (validPrices.length === 0) return prices[0]; + // If no prices above $5, try with a lower threshold but above typical coupon amounts + if (validPrices.length === 0) { + const lowThresholdPrices = prices.filter((p) => p.price >= 2); + if (lowThresholdPrices.length > 0) { + lowThresholdPrices.sort((a, b) => a.price - b.price); + return lowThresholdPrices[0]; + } + // Fall back to original list if nothing matches + return prices[0]; + } - // Sort by price and pick the middle one (often the actual price) - // This helps avoid picking shipping costs or discounts + // Sort by price - the lowest valid price is often the sale/current price validPrices.sort((a, b) => a.price - b.price); - // Return the first (lowest) valid price - often the current/sale price return validPrices[0]; }