Fix Amazon scraper picking up coupon prices instead of product price

- Add detection for coupon/savings containers and skip prices within them
- Check parent elements for coupon-related IDs, classes, and text
- Add minimum price threshold of $2 (coupons are typically $1-5)
- Add fallback to parse Amazon's whole/fraction price format directly
- Increase findMostLikelyPrice threshold from $0.99 to $5

This fixes the issue where $1 coupon savings were being scraped
instead of the actual $25.99 product price.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-20 20:46:17 -05:00
parent ba9e52b90f
commit bf111e13d8
2 changed files with 105 additions and 22 deletions

View file

@ -24,26 +24,101 @@ const siteScrapers: SiteScraper[] = [
{
match: (url) => /amazon\.(com|co\.uk|ca|de|fr|es|it|co\.jp|in|com\.au)/i.test(url),
scrape: ($) => {
// Price selectors in order of preference (sale price first)
const priceSelectors = [
'#corePrice_feature_div .a-price .a-offscreen',
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
'#priceblock_dealprice',
'#priceblock_saleprice',
'#priceblock_ourprice',
'.a-price .a-offscreen',
'#price_inside_buybox',
'#newBuyBoxPrice',
'span[data-a-color="price"] .a-offscreen',
// Helper to check if element is inside a coupon/savings container
const isInCouponContainer = (el: ReturnType<typeof $>) => {
const parents = el.parents().toArray();
for (const parent of parents) {
const id = $(parent).attr('id') || '';
const className = $(parent).attr('class') || '';
const text = $(parent).text().toLowerCase();
if (/coupon|savings|save\s*\$|clipcoupon|promoprice/i.test(id + className)) {
return true;
}
// Check if the immediate container mentions "save" or "coupon"
if (text.includes('save $') || text.includes('coupon') || text.includes('clip')) {
// Only consider it a coupon if it's a small container
if (text.length < 100) return true;
}
}
return false;
};
// Try to get the main displayed price from specific containers first
// These are the primary price display areas on Amazon
const primaryPriceContainers = [
'#corePrice_feature_div',
'#corePriceDisplay_desktop_feature_div',
'#apex_desktop_newAccordionRow',
'#apex_offerDisplay_desktop',
];
let price: ParsedPrice | null = null;
for (const selector of priceSelectors) {
const el = $(selector).first();
if (el.length) {
// First, try the primary price containers
for (const containerId of primaryPriceContainers) {
const container = $(containerId);
if (!container.length) continue;
// Look for the main price display (not savings/coupons)
const priceElements = container.find('.a-price .a-offscreen');
for (let i = 0; i < priceElements.length; i++) {
const el = $(priceElements[i]);
// Skip if this is inside a coupon container
if (isInCouponContainer(el)) continue;
// Skip if the parent has "savings" or similar class
const parentClass = el.parent().attr('class') || '';
if (/savings|coupon|save/i.test(parentClass)) continue;
const text = el.text().trim();
price = parsePrice(text);
if (price) break;
const parsed = parsePrice(text);
// Validate the price is reasonable (not a $1 coupon)
if (parsed && parsed.price >= 2) {
price = parsed;
break;
}
}
if (price) break;
}
// Fallback: try other known price selectors
if (!price) {
const fallbackSelectors = [
'#priceblock_dealprice',
'#priceblock_saleprice',
'#priceblock_ourprice',
'#price_inside_buybox',
'#newBuyBoxPrice',
'span[data-a-color="price"] .a-offscreen',
];
for (const selector of fallbackSelectors) {
const el = $(selector).first();
if (el.length && !isInCouponContainer(el)) {
const text = el.text().trim();
const parsed = parsePrice(text);
if (parsed && parsed.price >= 2) {
price = parsed;
break;
}
}
}
}
// Last resort: look for the whole/fraction price format
if (!price) {
const whole = $('#corePrice_feature_div .a-price-whole').first().text().replace(',', '');
const fraction = $('#corePrice_feature_div .a-price-fraction').first().text();
if (whole) {
const priceStr = `$${whole}${fraction ? '.' + fraction : ''}`;
const parsed = parsePrice(priceStr);
if (parsed && parsed.price >= 2) {
price = parsed;
}
}
}

View file

@ -107,15 +107,23 @@ export function findMostLikelyPrice(prices: ParsedPrice[]): ParsedPrice | null {
if (prices.length === 0) return null;
if (prices.length === 1) return prices[0];
// Filter out very small prices (likely not product prices)
const validPrices = prices.filter((p) => p.price >= 0.99);
// Filter out very small prices (likely coupons, savings amounts, not actual product prices)
// Most real products cost at least $2-3, and coupon amounts are often $1-5
const validPrices = prices.filter((p) => p.price >= 5);
if (validPrices.length === 0) return prices[0];
// If no prices above $5, try with a lower threshold but above typical coupon amounts
if (validPrices.length === 0) {
const lowThresholdPrices = prices.filter((p) => p.price >= 2);
if (lowThresholdPrices.length > 0) {
lowThresholdPrices.sort((a, b) => a.price - b.price);
return lowThresholdPrices[0];
}
// Fall back to original list if nothing matches
return prices[0];
}
// Sort by price and pick the middle one (often the actual price)
// This helps avoid picking shipping costs or discounts
// Sort by price - the lowest valid price is often the sale/current price
validPrices.sort((a, b) => a.price - b.price);
// Return the first (lowest) valid price - often the current/sale price
return validPrices[0];
}