Add Magento 2 site scraper for better price extraction

- Added dedicated Magento 2 scraper that targets data-price-amount
- Looks for price in correct context (price-box, special-price, finalPrice)
- Properly detects CHF, EUR, GBP, USD currencies
- Improved generic currency detection with multiple text sources
- Fixes CHF price extraction for Degussa gold shop and other Magento sites

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-23 13:43:35 -05:00
parent 5a682276b6
commit e18be5bbc7

View file

@ -681,6 +681,75 @@ const siteScrapers: SiteScraper[] = [
},
},
// Magento 2 (generic - covers many sites including Degussa)
{
match: (url) => {
// Match common Magento indicators in URL or just try for any .html product page
return /\.(html|htm)$/i.test(url) || /\/catalog\/product\//i.test(url);
},
scrape: ($) => {
let price: ParsedPrice | null = null;
let name: string | null = null;
let imageUrl: string | null = null;
// Magento 2 stores prices in data-price-amount attribute
// Look for the final/special price first, then regular price
const priceSelectors = [
'.price-box .special-price [data-price-amount]',
'.price-box .price-final_price [data-price-amount]',
'.price-box [data-price-type="finalPrice"] [data-price-amount]',
'.price-box [data-price-amount]',
'[data-price-amount]',
];
for (const selector of priceSelectors) {
const el = $(selector).first();
if (el.length) {
const priceAmount = el.attr('data-price-amount');
if (priceAmount) {
const priceValue = parseFloat(priceAmount);
if (!isNaN(priceValue) && priceValue > 0) {
// Detect currency from the page
let currency = 'USD';
const priceText = el.closest('.price-box').text() || el.parent().text() || '';
const currencyMatch = priceText.match(/\b(CHF|EUR|GBP|USD|CAD|AUD)\b/i);
if (currencyMatch) {
currency = currencyMatch[1].toUpperCase();
} else {
// Check for currency symbols
const symbolMatch = priceText.match(/([$€£])/);
if (symbolMatch) {
currency = symbolMatch[1] === '€' ? 'EUR' : symbolMatch[1] === '£' ? 'GBP' : 'USD';
}
}
price = { price: priceValue, currency };
break;
}
}
}
}
// Get product name
name = $('h1.page-title span').text().trim() ||
$('h1.product-name').text().trim() ||
$('.product-info-main h1').text().trim() ||
$('[data-ui-id="page-title-wrapper"]').text().trim() ||
null;
// Get product image
imageUrl = $('[data-gallery-role="gallery"] img').first().attr('src') ||
$('.product.media img').first().attr('src') ||
$('.fotorama__stage img').first().attr('src') ||
null;
// Only return if we found a price (indicates it's likely a Magento site)
if (price) {
return { name, price, imageUrl };
}
return {};
},
},
];
// Generic selectors as fallback
@ -1088,11 +1157,35 @@ function extractGenericPrice($: CheerioAPI): ParsedPrice | null {
if (priceAmount) {
const price = parseFloat(priceAmount);
if (!isNaN(price) && price > 0) {
// Try to detect currency from nearby elements or default to USD
const currencyEl = $el.closest('.price-box').find('[data-price-type]').first();
const priceText = currencyEl.length ? currencyEl.text() : text;
const currencyMatch = priceText.match(/([A-Z]{3}|[$€£¥₹])/);
const currency = currencyMatch ? (currencyMatch[1] === '$' ? 'USD' : currencyMatch[1] === '€' ? 'EUR' : currencyMatch[1] === '£' ? 'GBP' : currencyMatch[1]) : 'USD';
// Try to detect currency from nearby elements, parent, or page
let currency = 'USD';
// Look for currency in the element's text, parent, and price-box container
const textSources = [
text,
$el.parent().text(),
$el.closest('.price-box').text(),
$el.closest('.price-wrapper').text(),
$el.closest('[class*="price"]').text(),
];
for (const source of textSources) {
if (!source) continue;
// Look for known currency codes first (more specific)
const currencyCodeMatch = source.match(/\b(CHF|EUR|GBP|USD|CAD|AUD|JPY|INR)\b/i);
if (currencyCodeMatch) {
currency = currencyCodeMatch[1].toUpperCase();
break;
}
// Then try currency symbols
const symbolMatch = source.match(/([$€£¥₹])/);
if (symbolMatch) {
const symbolMap: Record<string, string> = { '$': 'USD', '€': 'EUR', '£': 'GBP', '¥': 'JPY', '₹': 'INR' };
currency = symbolMap[symbolMatch[1]] || 'USD';
break;
}
}
prices.push({ price, currency });
return;
}