Fix Amazon scraper picking up coupon prices instead of product price

- Add detection for coupon/savings containers and skip prices within them
- Check parent elements for coupon-related IDs, classes, and text
- Add minimum price threshold of $2 (coupons are typically $1-5)
- Add fallback to parse Amazon's whole/fraction price format directly
- Increase findMostLikelyPrice threshold from $0.99 to $5

This fixes the issue where $1 coupon savings were being scraped
instead of the actual $25.99 product price.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-20 20:46:17 -05:00
parent ba9e52b90f
commit bf111e13d8
2 changed files with 105 additions and 22 deletions

View file

@ -107,15 +107,23 @@ export function findMostLikelyPrice(prices: ParsedPrice[]): ParsedPrice | null {
if (prices.length === 0) return null;
if (prices.length === 1) return prices[0];
// Filter out very small prices (likely not product prices)
const validPrices = prices.filter((p) => p.price >= 0.99);
// Filter out very small prices (likely coupons, savings amounts, not actual product prices)
// Most real products cost at least $2-3, and coupon amounts are often $1-5
const validPrices = prices.filter((p) => p.price >= 5);
if (validPrices.length === 0) return prices[0];
// If no prices above $5, try with a lower threshold but above typical coupon amounts
if (validPrices.length === 0) {
const lowThresholdPrices = prices.filter((p) => p.price >= 2);
if (lowThresholdPrices.length > 0) {
lowThresholdPrices.sort((a, b) => a.price - b.price);
return lowThresholdPrices[0];
}
// Fall back to original list if nothing matches
return prices[0];
}
// Sort by price and pick the middle one (often the actual price)
// This helps avoid picking shipping costs or discounts
// Sort by price - the lowest valid price is often the sale/current price
validPrices.sort((a, b) => a.price - b.price);
// Return the first (lowest) valid price - often the current/sale price
return validPrices[0];
}