diff --git a/CHANGELOG.md b/CHANGELOG.md index b38409d..43c87c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ All notable changes to PriceGhost will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.4] - 2026-01-24 + +### Added + +- **Multi-Strategy Price Voting System** - More robust price extraction using multiple methods + - Runs all extraction methods (JSON-LD, site-specific, generic CSS, AI) in parallel + - Uses consensus voting to select the correct price when methods agree + - AI arbitration when extraction methods disagree + - User price selection dialog when price is ambiguous (multiple prices found) + - Remembers the winning extraction method for future checks of the same product +- **Price Selection Modal** - When multiple prices are found for a product, users can now select the correct one + - Shows all price candidates with confidence levels + - Displays extraction method and context for each candidate + - Sorted by confidence (highest first) + +### Changed + +- **Improved scheduler** - Now uses preferred extraction method when available for faster, more accurate re-checks + +--- + ## [1.0.3] - 2026-01-24 ### Added @@ -141,6 +162,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Version | Date | Description | |---------|------|-------------| +| 1.0.4 | 2026-01-24 | Multi-strategy price voting system with user selection for ambiguous prices | | 1.0.3 | 2026-01-24 | Notification history with bell icon, clear button, and full history page | | 1.0.2 | 2026-01-23 | Fixed stock status false positives for in-stock items | | 1.0.1 | 2026-01-23 | Bug fixes, JS-rendered price support, pre-order detection | diff --git a/backend/src/index.ts b/backend/src/index.ts index b824baf..a755533 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -151,6 +151,22 @@ async function runMigrations() { END $$; `); + // Add multi-strategy voting columns to products table + await client.query(` + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'preferred_extraction_method') THEN + ALTER TABLE products ADD COLUMN preferred_extraction_method VARCHAR(20); + END IF; + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'needs_price_review') THEN + ALTER TABLE products ADD COLUMN needs_price_review BOOLEAN DEFAULT false; + END IF; + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'price_candidates') THEN + ALTER TABLE products ADD COLUMN price_candidates JSONB; + END IF; + END $$; + `); + // Create notification_history table for tracking all triggered notifications await client.query(` CREATE TABLE IF NOT EXISTS notification_history ( diff --git a/backend/src/models/index.ts b/backend/src/models/index.ts index 6064ffd..c8ca614 100644 --- a/backend/src/models/index.ts +++ b/backend/src/models/index.ts @@ -551,6 +551,21 @@ export const productQueries = { ); return result.rows; }, + + updateExtractionMethod: async (id: number, method: string): Promise => { + await pool.query( + 'UPDATE products SET preferred_extraction_method = $1, needs_price_review = false WHERE id = $2', + [method, id] + ); + }, + + getPreferredExtractionMethod: async (id: number): Promise => { + const result = await pool.query( + 'SELECT preferred_extraction_method FROM products WHERE id = $1', + [id] + ); + return result.rows[0]?.preferred_extraction_method || null; + }, }; // Price History types and queries diff --git a/backend/src/routes/products.ts b/backend/src/routes/products.ts index c87af38..fac5724 100644 --- a/backend/src/routes/products.ts +++ b/backend/src/routes/products.ts @@ -1,7 +1,7 @@ import { Router, Response } from 'express'; import { AuthRequest, authMiddleware } from '../middleware/auth'; import { productQueries, priceHistoryQueries, stockStatusHistoryQueries } from '../models'; -import { scrapeProduct } from '../services/scraper'; +import { scrapeProduct, scrapeProductWithVoting, ExtractionMethod } from '../services/scraper'; const router = Router(); @@ -20,11 +20,11 @@ router.get('/', async (req: AuthRequest, res: Response) => { } }); -// Add a new product to track +// Add a new product to track (with multi-strategy voting) router.post('/', async (req: AuthRequest, res: Response) => { try { const userId = req.userId!; - const { url, refresh_interval } = req.body; + const { url, refresh_interval, selectedPrice, selectedMethod } = req.body; if (!url) { res.status(400).json({ error: 'URL is required' }); @@ -39,8 +39,47 @@ router.post('/', async (req: AuthRequest, res: Response) => { return; } - // Scrape product info (pass userId for AI fallback) - const scrapedData = await scrapeProduct(url, userId); + // If user is confirming a price selection, use the old scraper with their choice + if (selectedPrice !== undefined && selectedMethod) { + // User has selected a price from candidates - use it directly + const scrapedData = await scrapeProduct(url, userId); + + // Create product with the user-selected price + const product = await productQueries.create( + userId, + url, + scrapedData.name, + scrapedData.imageUrl, + refresh_interval || 3600, + scrapedData.stockStatus + ); + + // Store the preferred extraction method and the user-selected price + await productQueries.updateExtractionMethod(product.id, selectedMethod); + + // Record the user-selected price + await priceHistoryQueries.create( + product.id, + selectedPrice, + 'USD', // TODO: Get currency from selection + null + ); + + // Record initial stock status + if (scrapedData.stockStatus !== 'unknown') { + await stockStatusHistoryQueries.recordChange(product.id, scrapedData.stockStatus); + } + + // Update last_checked timestamp + await productQueries.updateLastChecked(product.id, product.refresh_interval); + + const productWithPrice = await productQueries.findById(product.id, userId); + res.status(201).json(productWithPrice); + return; + } + + // Use multi-strategy voting scraper + const scrapedData = await scrapeProductWithVoting(url, userId); // Allow adding out-of-stock products, but require a price for in-stock ones if (!scrapedData.price && scrapedData.stockStatus !== 'out_of_stock') { @@ -50,6 +89,26 @@ router.post('/', async (req: AuthRequest, res: Response) => { return; } + // If needsReview is true and there are multiple candidates, return them for user selection + if (scrapedData.needsReview && scrapedData.priceCandidates.length > 1) { + res.status(200).json({ + needsReview: true, + name: scrapedData.name, + imageUrl: scrapedData.imageUrl, + stockStatus: scrapedData.stockStatus, + priceCandidates: scrapedData.priceCandidates.map(c => ({ + price: c.price, + currency: c.currency, + method: c.method, + context: c.context, + confidence: c.confidence, + })), + suggestedPrice: scrapedData.price, + url, + }); + return; + } + // Create product with stock status const product = await productQueries.create( userId, @@ -60,6 +119,11 @@ router.post('/', async (req: AuthRequest, res: Response) => { scrapedData.stockStatus ); + // Store the extraction method that worked + if (scrapedData.selectedMethod) { + await productQueries.updateExtractionMethod(product.id, scrapedData.selectedMethod); + } + // Record initial price if available if (scrapedData.price) { await priceHistoryQueries.create( diff --git a/backend/src/services/ai-extractor.ts b/backend/src/services/ai-extractor.ts index 61dfcbb..38e7660 100644 --- a/backend/src/services/ai-extractor.ts +++ b/backend/src/services/ai-extractor.ts @@ -4,7 +4,7 @@ import axios from 'axios'; import { load } from 'cheerio'; import { AISettings } from '../models'; import { ParsedPrice } from '../utils/priceParser'; -import { StockStatus } from './scraper'; +import { StockStatus, PriceCandidate } from './scraper'; export interface AIExtractionResult { name: string | null; @@ -548,3 +548,211 @@ export async function tryAIVerification( return null; } } + +// Arbitration prompt for when multiple extraction methods disagree +const ARBITRATION_PROMPT = `You are a price arbitration assistant. Multiple price extraction methods found different prices for the same product. Help determine the correct price. + +Found prices: +$CANDIDATES$ + +Analyze the HTML content below and determine which price is the correct CURRENT selling price for the main product. + +Consider: +- JSON-LD structured data is usually highly reliable (schema.org standard) +- Site-specific extractors are well-tested for major retailers +- Generic CSS selectors might catch wrong prices (shipping, savings, bundles, etc.) +- Look for the price that appears in the main product display area +- Ignore crossed-out/original prices, shipping costs, subscription prices, or bundle prices + +Return a JSON object with: +- selectedIndex: the 0-based index of the correct price from the list above +- confidence: your confidence from 0 to 1 +- reason: brief explanation of why this price is correct + +Only return valid JSON, no explanation text outside the JSON. + +HTML Content: +`; + +export interface AIArbitrationResult { + selectedPrice: PriceCandidate | null; + confidence: number; + reason: string; +} + +async function arbitrateWithAnthropic( + html: string, + candidates: PriceCandidate[], + apiKey: string +): Promise { + const anthropic = new Anthropic({ apiKey }); + + const candidatesList = candidates.map((c, i) => + `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})` + ).join('\n'); + + const preparedHtml = prepareHtmlForAI(html); + const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml; + + const response = await anthropic.messages.create({ + model: 'claude-3-haiku-20240307', + max_tokens: 512, + messages: [{ role: 'user', content: prompt }], + }); + + const content = response.content[0]; + if (content.type !== 'text') { + throw new Error('Unexpected response type from Anthropic'); + } + + return parseArbitrationResponse(content.text, candidates); +} + +async function arbitrateWithOpenAI( + html: string, + candidates: PriceCandidate[], + apiKey: string +): Promise { + const openai = new OpenAI({ apiKey }); + + const candidatesList = candidates.map((c, i) => + `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})` + ).join('\n'); + + const preparedHtml = prepareHtmlForAI(html); + const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml; + + const response = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + max_tokens: 512, + messages: [{ role: 'user', content: prompt }], + }); + + const content = response.choices[0]?.message?.content; + if (!content) { + throw new Error('No response from OpenAI'); + } + + return parseArbitrationResponse(content, candidates); +} + +async function arbitrateWithOllama( + html: string, + candidates: PriceCandidate[], + baseUrl: string, + model: string +): Promise { + const candidatesList = candidates.map((c, i) => + `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})` + ).join('\n'); + + const preparedHtml = prepareHtmlForAI(html); + const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml; + + const response = await axios.post( + `${baseUrl}/api/chat`, + { + model: model, + messages: [{ role: 'user', content: prompt }], + stream: false, + }, + { + headers: { 'Content-Type': 'application/json' }, + timeout: 120000, + } + ); + + const content = response.data?.message?.content; + if (!content) { + throw new Error('No response from Ollama'); + } + + return parseArbitrationResponse(content, candidates); +} + +function parseArbitrationResponse( + responseText: string, + candidates: PriceCandidate[] +): AIArbitrationResult { + console.log(`[AI Arbitrate] Raw response: ${responseText.substring(0, 500)}...`); + + const defaultResult: AIArbitrationResult = { + selectedPrice: null, + confidence: 0, + reason: 'Could not parse AI response', + }; + + let jsonStr = responseText.trim(); + + // Handle markdown code blocks + const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/); + if (jsonMatch) { + jsonStr = jsonMatch[1].trim(); + } + + // Try to find JSON object + const objectMatch = jsonStr.match(/\{[\s\S]*\}/); + if (objectMatch) { + jsonStr = objectMatch[0]; + } + + try { + const data = JSON.parse(jsonStr); + console.log(`[AI Arbitrate] Parsed:`, JSON.stringify(data, null, 2)); + + const selectedIndex = data.selectedIndex; + if (typeof selectedIndex === 'number' && selectedIndex >= 0 && selectedIndex < candidates.length) { + return { + selectedPrice: candidates[selectedIndex], + confidence: data.confidence ?? 0.7, + reason: data.reason || 'AI selected this price', + }; + } + + return defaultResult; + } catch (error) { + console.error('[AI Arbitrate] Failed to parse response:', responseText); + return defaultResult; + } +} + +// Export for use in voting scraper to arbitrate between disagreeing methods +export async function tryAIArbitration( + url: string, + html: string, + candidates: PriceCandidate[], + userId: number +): Promise { + try { + const { userQueries } = await import('../models'); + const settings = await userQueries.getAISettings(userId); + + // Need AI enabled for arbitration + if (!settings?.ai_enabled && !settings?.ai_verification_enabled) { + return null; + } + + // Need at least 2 candidates to arbitrate + if (candidates.length < 2) { + return null; + } + + // Use the configured provider + if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) { + console.log(`[AI Arbitrate] Using Anthropic to arbitrate ${candidates.length} prices for ${url}`); + return await arbitrateWithAnthropic(html, candidates, settings.anthropic_api_key); + } else if (settings.ai_provider === 'openai' && settings.openai_api_key) { + console.log(`[AI Arbitrate] Using OpenAI to arbitrate ${candidates.length} prices for ${url}`); + return await arbitrateWithOpenAI(html, candidates, settings.openai_api_key); + } else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) { + console.log(`[AI Arbitrate] Using Ollama to arbitrate ${candidates.length} prices for ${url}`); + return await arbitrateWithOllama(html, candidates, settings.ollama_base_url, settings.ollama_model); + } + + console.log(`[AI Arbitrate] No provider configured`); + return null; + } catch (error) { + console.error(`[AI Arbitrate] Arbitration failed for ${url}:`, error); + return null; + } +} diff --git a/backend/src/services/scheduler.ts b/backend/src/services/scheduler.ts index 42adffb..d74a812 100644 --- a/backend/src/services/scheduler.ts +++ b/backend/src/services/scheduler.ts @@ -1,6 +1,6 @@ import cron from 'node-cron'; import { productQueries, priceHistoryQueries, userQueries, stockStatusHistoryQueries, notificationHistoryQueries, NotificationType } from '../models'; -import { scrapeProduct } from './scraper'; +import { scrapeProduct, scrapeProductWithVoting, ExtractionMethod } from './scraper'; import { sendNotifications, NotificationPayload } from './notifications'; let isRunning = false; @@ -23,7 +23,15 @@ async function checkPrices(): Promise { try { console.log(`Checking price for product ${product.id}: ${product.url}`); - const scrapedData = await scrapeProduct(product.url, product.user_id); + // Get preferred extraction method for this product (if user previously selected one) + const preferredMethod = await productQueries.getPreferredExtractionMethod(product.id); + + // Use voting scraper with preferred method if available + const scrapedData = await scrapeProductWithVoting( + product.url, + product.user_id, + preferredMethod as ExtractionMethod | undefined + ); // Check for back-in-stock notification const wasOutOfStock = product.stock_status === 'out_of_stock'; diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index b0f937a..1617883 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -13,6 +13,220 @@ puppeteer.use(StealthPlugin()); export type StockStatus = 'in_stock' | 'out_of_stock' | 'unknown'; +// Extraction method types for multi-strategy voting +export type ExtractionMethod = 'json-ld' | 'site-specific' | 'generic-css' | 'ai'; + +// Price candidate from a single extraction method +export interface PriceCandidate { + price: number; + currency: string; + method: ExtractionMethod; + context?: string; // Text around the price for user context + confidence: number; // 0-1 confidence score +} + +// Extended scrape result with candidates for voting +export interface ScrapedProductWithCandidates { + name: string | null; + price: ParsedPrice | null; + imageUrl: string | null; + url: string; + stockStatus: StockStatus; + aiStatus: 'verified' | 'corrected' | null; + priceCandidates: PriceCandidate[]; + needsReview: boolean; + selectedMethod?: ExtractionMethod; // Which method was used for final price +} + +// Check if two prices are "close enough" to be considered the same (within 5%) +function pricesMatch(price1: number, price2: number): boolean { + if (price1 === price2) return true; + const diff = Math.abs(price1 - price2); + const avg = (price1 + price2) / 2; + return (diff / avg) < 0.05; // Within 5% +} + +// Find consensus among price candidates +function findPriceConsensus(candidates: PriceCandidate[]): { price: PriceCandidate | null; hasConsensus: boolean; groups: PriceCandidate[][] } { + if (candidates.length === 0) return { price: null, hasConsensus: false, groups: [] }; + if (candidates.length === 1) return { price: candidates[0], hasConsensus: true, groups: [[candidates[0]]] }; + + // Group prices that match + const groups: PriceCandidate[][] = []; + for (const candidate of candidates) { + let foundGroup = false; + for (const group of groups) { + if (pricesMatch(candidate.price, group[0].price)) { + group.push(candidate); + foundGroup = true; + break; + } + } + if (!foundGroup) { + groups.push([candidate]); + } + } + + // Sort groups by size (most votes first), then by confidence + groups.sort((a, b) => { + if (b.length !== a.length) return b.length - a.length; + const avgConfA = a.reduce((sum, c) => sum + c.confidence, 0) / a.length; + const avgConfB = b.reduce((sum, c) => sum + c.confidence, 0) / b.length; + return avgConfB - avgConfA; + }); + + const largestGroup = groups[0]; + // Consensus if majority agrees (>= 50% of methods) OR if top group has significantly more votes + const hasConsensus = largestGroup.length >= Math.ceil(candidates.length / 2) || + (groups.length > 1 && largestGroup.length > groups[1].length); + + // Pick the highest confidence candidate from the winning group + const winner = largestGroup.sort((a, b) => b.confidence - a.confidence)[0]; + + return { price: winner, hasConsensus, groups }; +} + +// Extract price candidates from JSON-LD structured data +function extractJsonLdCandidates($: CheerioAPI): PriceCandidate[] { + const candidates: PriceCandidate[] = []; + try { + const scripts = $('script[type="application/ld+json"]'); + for (let i = 0; i < scripts.length; i++) { + const content = $(scripts[i]).html(); + if (!content) continue; + + const data = JSON.parse(content) as JsonLdProduct | JsonLdProduct[]; + const product = findProduct(data); + + if (product?.offers) { + const offer = Array.isArray(product.offers) ? product.offers[0] : product.offers; + const priceValue = offer.lowPrice || offer.price || offer.priceSpecification?.price; + const currency = offer.priceCurrency || offer.priceSpecification?.priceCurrency || 'USD'; + + if (priceValue) { + const price = parseFloat(String(priceValue)); + if (!isNaN(price) && price > 0) { + candidates.push({ + price, + currency, + method: 'json-ld', + context: `Structured data: ${product.name || 'Product'}`, + confidence: 0.9, // JSON-LD is highly reliable + }); + } + } + } + } + } catch (_e) { + // JSON parse error + } + return candidates; +} + +// Extract price candidates from site-specific scraper +function extractSiteSpecificCandidates($: CheerioAPI, url: string): { candidates: PriceCandidate[]; name: string | null; imageUrl: string | null; stockStatus: StockStatus } { + const candidates: PriceCandidate[] = []; + let name: string | null = null; + let imageUrl: string | null = null; + let stockStatus: StockStatus = 'unknown'; + + const siteScraper = siteScrapers.find((s) => s.match(url)); + if (siteScraper) { + const siteResult = siteScraper.scrape($, url); + if (siteResult.price) { + candidates.push({ + price: siteResult.price.price, + currency: siteResult.price.currency, + method: 'site-specific', + context: `Site-specific extractor for ${new URL(url).hostname}`, + confidence: 0.85, // Site-specific scrapers are well-tested + }); + } + name = siteResult.name || null; + imageUrl = siteResult.imageUrl || null; + stockStatus = siteResult.stockStatus || 'unknown'; + } + + return { candidates, name, imageUrl, stockStatus }; +} + +// Extract price candidates from generic CSS selectors +function extractGenericCssCandidates($: CheerioAPI): PriceCandidate[] { + const candidates: PriceCandidate[] = []; + const seen = new Set(); + + for (const selector of genericPriceSelectors) { + const elements = $(selector); + elements.each((_, el) => { + const $el = $(el); + // Skip if this looks like an "original" or "was" price + const classAttr = $el.attr('class') || ''; + const parentClass = $el.parent().attr('class') || ''; + if (/original|was|old|regular|compare|strikethrough|line-through/i.test(classAttr + parentClass)) { + return; + } + + // Check various attributes where price might be stored + const priceAmount = $el.attr('data-price-amount'); + const dataPrice = $el.attr('data-price'); + const content = $el.attr('content'); + const text = $el.text(); + + let parsed: ParsedPrice | null = null; + let context = selector; + + // Try data-price-amount first (Magento stores numeric value here) + if (priceAmount) { + const price = parseFloat(priceAmount); + if (!isNaN(price) && price > 0) { + let currency = 'USD'; + const textSources = [text, $el.parent().text(), $el.closest('.price-box').text()]; + for (const source of textSources) { + if (!source) continue; + const currencyCodeMatch = source.match(/\b(CHF|EUR|GBP|USD|CAD|AUD|JPY|INR)\b/i); + if (currencyCodeMatch) { + currency = currencyCodeMatch[1].toUpperCase(); + break; + } + const symbolMatch = source.match(/([$€£¥₹])/); + if (symbolMatch) { + const symbolMap: Record = { '$': 'USD', '€': 'EUR', '£': 'GBP', '¥': 'JPY', '₹': 'INR' }; + currency = symbolMap[symbolMatch[1]] || 'USD'; + break; + } + } + parsed = { price, currency }; + context = `data-price-amount attribute`; + } + } + + if (!parsed) { + const priceStr = content || dataPrice || text; + parsed = parsePrice(priceStr); + if (parsed) { + context = text.trim().slice(0, 50); + } + } + + if (parsed && parsed.price > 0 && !seen.has(parsed.price)) { + seen.add(parsed.price); + candidates.push({ + price: parsed.price, + currency: parsed.currency, + method: 'generic-css', + context, + confidence: 0.6, // Generic CSS is less reliable + }); + } + }); + + // Only take first few generic candidates to avoid noise + if (candidates.length >= 3) break; + } + + return candidates; +} + // Browser-based scraping for sites that block HTTP requests (e.g., Cloudflare) async function scrapeWithBrowser(url: string): Promise { const browser = await puppeteer.launch({ @@ -1059,6 +1273,287 @@ export async function scrapeProduct(url: string, userId?: number): Promise { + const result: ScrapedProductWithCandidates = { + name: null, + price: null, + imageUrl: null, + url, + stockStatus: 'unknown', + aiStatus: null, + priceCandidates: [], + needsReview: false, + }; + + let html: string = ''; + + try { + let usedBrowser = false; + + // Fetch HTML + try { + const response = await axios.get(url, { + headers: { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.9', + 'Accept-Encoding': 'gzip, deflate, br', + 'Cache-Control': 'no-cache', + Pragma: 'no-cache', + 'Sec-Ch-Ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', + 'Sec-Ch-Ua-Mobile': '?0', + 'Sec-Ch-Ua-Platform': '"Windows"', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'none', + 'Sec-Fetch-User': '?1', + 'Upgrade-Insecure-Requests': '1', + }, + timeout: 20000, + maxRedirects: 5, + }); + html = response.data; + } catch (axiosError) { + if (axiosError instanceof AxiosError && axiosError.response?.status === 403) { + console.log(`[Voting] HTTP blocked (403) for ${url}, using browser...`); + html = await scrapeWithBrowser(url); + usedBrowser = true; + } else { + throw axiosError; + } + } + + let $ = load(html); + + // Collect candidates from all methods + const allCandidates: PriceCandidate[] = []; + + // 1. JSON-LD extraction (highest reliability) + const jsonLdCandidates = extractJsonLdCandidates($); + allCandidates.push(...jsonLdCandidates); + console.log(`[Voting] JSON-LD found ${jsonLdCandidates.length} candidates`); + + // 2. Site-specific extraction + const siteResult = extractSiteSpecificCandidates($, url); + allCandidates.push(...siteResult.candidates); + if (siteResult.name) result.name = siteResult.name; + if (siteResult.imageUrl) result.imageUrl = siteResult.imageUrl; + if (siteResult.stockStatus !== 'unknown') result.stockStatus = siteResult.stockStatus; + console.log(`[Voting] Site-specific found ${siteResult.candidates.length} candidates`); + + // 3. Generic CSS extraction + const genericCandidates = extractGenericCssCandidates($); + allCandidates.push(...genericCandidates); + console.log(`[Voting] Generic CSS found ${genericCandidates.length} candidates`); + + // If no candidates found in static HTML, try browser rendering + if (allCandidates.length === 0 && !usedBrowser) { + console.log(`[Voting] No candidates in static HTML, trying browser...`); + try { + html = await scrapeWithBrowser(url); + usedBrowser = true; + $ = load(html); + + // Re-run all extraction methods + allCandidates.push(...extractJsonLdCandidates($)); + const browserSiteResult = extractSiteSpecificCandidates($, url); + allCandidates.push(...browserSiteResult.candidates); + if (!result.name && browserSiteResult.name) result.name = browserSiteResult.name; + if (!result.imageUrl && browserSiteResult.imageUrl) result.imageUrl = browserSiteResult.imageUrl; + if (result.stockStatus === 'unknown' && browserSiteResult.stockStatus !== 'unknown') { + result.stockStatus = browserSiteResult.stockStatus; + } + allCandidates.push(...extractGenericCssCandidates($)); + console.log(`[Voting] Browser found ${allCandidates.length} total candidates`); + } catch (browserError) { + console.error(`[Voting] Browser fallback failed:`, browserError); + } + } + + // Fill in missing metadata + if (!result.name) { + result.name = extractGenericName($) || $('meta[property="og:title"]').attr('content') || null; + } + if (!result.imageUrl) { + result.imageUrl = extractGenericImage($, url) || $('meta[property="og:image"]').attr('content') || null; + } + if (result.stockStatus === 'unknown') { + result.stockStatus = extractGenericStockStatus($); + } + + // Store all candidates + result.priceCandidates = allCandidates; + + // If user has a preferred method, try to use it + if (preferredMethod && allCandidates.length > 0) { + const preferredCandidate = allCandidates.find(c => c.method === preferredMethod); + if (preferredCandidate) { + console.log(`[Voting] Using preferred method ${preferredMethod}: ${preferredCandidate.price}`); + result.price = { price: preferredCandidate.price, currency: preferredCandidate.currency }; + result.selectedMethod = preferredMethod; + return result; + } + } + + // Find consensus + const { price: consensusPrice, hasConsensus, groups } = findPriceConsensus(allCandidates); + console.log(`[Voting] Consensus: ${hasConsensus}, Groups: ${groups.length}, Winner: ${consensusPrice?.price}`); + + if (hasConsensus && consensusPrice) { + // Clear consensus - use the winning price + result.price = { price: consensusPrice.price, currency: consensusPrice.currency }; + result.selectedMethod = consensusPrice.method; + console.log(`[Voting] Consensus price: ${consensusPrice.price} via ${consensusPrice.method}`); + } else if (allCandidates.length > 0) { + // No consensus - try AI arbitration if available + if (userId && html) { + console.log(`[Voting] No consensus, trying AI arbitration...`); + try { + const { tryAIArbitration } = await import('./ai-extractor'); + const aiResult = await tryAIArbitration(url, html, allCandidates, userId); + + if (aiResult && aiResult.selectedPrice) { + console.log(`[Voting] AI selected price: ${aiResult.selectedPrice.price} (reason: ${aiResult.reason})`); + result.price = { price: aiResult.selectedPrice.price, currency: aiResult.selectedPrice.currency }; + result.selectedMethod = aiResult.selectedPrice.method; + result.aiStatus = 'verified'; + + // Add AI as a candidate for transparency + if (!allCandidates.find(c => c.method === 'ai')) { + result.priceCandidates.push({ + price: aiResult.selectedPrice.price, + currency: aiResult.selectedPrice.currency, + method: 'ai', + context: `AI arbitration: ${aiResult.reason}`, + confidence: aiResult.confidence || 0.8, + }); + } + } else { + // AI couldn't decide either - flag for user review + console.log(`[Voting] AI couldn't decide, flagging for user review`); + result.needsReview = true; + // Use the most confident candidate as default + const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0]; + result.price = { price: bestCandidate.price, currency: bestCandidate.currency }; + result.selectedMethod = bestCandidate.method; + } + } catch (aiError) { + console.error(`[Voting] AI arbitration failed:`, aiError); + // Fall back to flagging for user review + result.needsReview = true; + const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0]; + result.price = { price: bestCandidate.price, currency: bestCandidate.currency }; + result.selectedMethod = bestCandidate.method; + } + } else { + // No AI available - flag for user review if multiple prices differ significantly + if (groups.length > 1) { + result.needsReview = true; + console.log(`[Voting] Multiple price groups found, flagging for user review`); + } + // Use the most confident candidate as default + const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0]; + result.price = { price: bestCandidate.price, currency: bestCandidate.currency }; + result.selectedMethod = bestCandidate.method; + } + } else { + // No candidates at all - try pure AI extraction + if (userId && html) { + console.log(`[Voting] No candidates found, trying AI extraction...`); + try { + const { tryAIExtraction } = await import('./ai-extractor'); + const aiResult = await tryAIExtraction(url, html, userId); + + if (aiResult && aiResult.price && aiResult.confidence > 0.5) { + console.log(`[Voting] AI extracted price: ${aiResult.price.price}`); + result.price = aiResult.price; + result.selectedMethod = 'ai'; + result.priceCandidates.push({ + price: aiResult.price.price, + currency: aiResult.price.currency, + method: 'ai', + context: 'AI extraction (no other methods found price)', + confidence: aiResult.confidence, + }); + if (!result.name && aiResult.name) result.name = aiResult.name; + if (!result.imageUrl && aiResult.imageUrl) result.imageUrl = aiResult.imageUrl; + if (result.stockStatus === 'unknown' && aiResult.stockStatus !== 'unknown') { + result.stockStatus = aiResult.stockStatus; + } + } + } catch (aiError) { + console.error(`[Voting] AI extraction failed:`, aiError); + } + } + } + + // If we have a price but AI is available, verify it + if (result.price && userId && html && !result.aiStatus) { + try { + const { tryAIVerification } = await import('./ai-extractor'); + const verifyResult = await tryAIVerification( + url, + html, + result.price.price, + result.price.currency, + userId + ); + + if (verifyResult) { + if (verifyResult.isCorrect) { + result.aiStatus = 'verified'; + } else if (verifyResult.suggestedPrice && verifyResult.confidence > 0.7) { + // AI suggests a different price - this might indicate we need review + const existingCandidate = allCandidates.find(c => + pricesMatch(c.price, verifyResult.suggestedPrice!.price) + ); + if (existingCandidate) { + // AI agrees with one of our candidates - use that + result.price = verifyResult.suggestedPrice; + result.selectedMethod = existingCandidate.method; + result.aiStatus = 'corrected'; + } else if (!result.needsReview) { + // AI suggests a price we didn't find - flag for review + result.needsReview = true; + result.priceCandidates.push({ + price: verifyResult.suggestedPrice.price, + currency: verifyResult.suggestedPrice.currency, + method: 'ai', + context: `AI suggestion: ${verifyResult.reason}`, + confidence: verifyResult.confidence, + }); + } + } + + // Update stock status from AI + if (verifyResult.stockStatus && verifyResult.stockStatus !== 'unknown') { + if (result.stockStatus === 'unknown' || verifyResult.stockStatus === 'out_of_stock') { + result.stockStatus = verifyResult.stockStatus; + } + } + } + } catch (verifyError) { + console.error(`[Voting] AI verification failed:`, verifyError); + } + } + + } catch (error) { + console.error(`[Voting] Error scraping ${url}:`, error); + } + + return result; +} + interface JsonLdProduct { '@type'?: string; '@graph'?: JsonLdProduct[]; diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 9eb1db2..75c150e 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -83,6 +83,27 @@ export interface ProductWithStats extends Product { } | null; } +// Response when product needs price review +export interface PriceCandidate { + price: number; + currency: string; + method: string; + context?: string; + confidence: number; +} + +export interface PriceReviewResponse { + needsReview: true; + name: string | null; + imageUrl: string | null; + stockStatus: string; + priceCandidates: PriceCandidate[]; + suggestedPrice: { price: number; currency: string } | null; + url: string; +} + +export type CreateProductResponse = Product | PriceReviewResponse; + export interface PriceHistory { id: number; product_id: number; @@ -96,8 +117,13 @@ export const productsApi = { getById: (id: number) => api.get(`/products/${id}`), - create: (url: string, refreshInterval?: number) => - api.post('/products', { url, refresh_interval: refreshInterval }), + create: (url: string, refreshInterval?: number, selectedPrice?: number, selectedMethod?: string) => + api.post('/products', { + url, + refresh_interval: refreshInterval, + selectedPrice, + selectedMethod, + }), update: (id: number, data: { name?: string; diff --git a/frontend/src/components/PriceSelectionModal.tsx b/frontend/src/components/PriceSelectionModal.tsx new file mode 100644 index 0000000..04bbd9b --- /dev/null +++ b/frontend/src/components/PriceSelectionModal.tsx @@ -0,0 +1,337 @@ +import { useState } from 'react'; + +export interface PriceCandidate { + price: number; + currency: string; + method: string; + context?: string; + confidence: number; +} + +interface PriceSelectionModalProps { + isOpen: boolean; + onClose: () => void; + onSelect: (price: number, method: string) => void; + productName: string | null; + imageUrl: string | null; + candidates: PriceCandidate[]; + suggestedPrice: { price: number; currency: string } | null; + url: string; +} + +const METHOD_LABELS: Record = { + 'json-ld': 'Structured Data', + 'site-specific': 'Site Scraper', + 'generic-css': 'CSS Selector', + 'ai': 'AI Extraction', +}; + +const METHOD_DESCRIPTIONS: Record = { + 'json-ld': 'Found in page metadata (schema.org)', + 'site-specific': 'Extracted using site-specific rules', + 'generic-css': 'Found using general price selectors', + 'ai': 'Detected by AI analysis', +}; + +export default function PriceSelectionModal({ + isOpen, + onClose, + onSelect, + productName, + imageUrl, + candidates, + suggestedPrice, + url, +}: PriceSelectionModalProps) { + const [selectedIndex, setSelectedIndex] = useState( + suggestedPrice + ? candidates.findIndex(c => c.price === suggestedPrice.price) + : 0 + ); + const [isSubmitting, setIsSubmitting] = useState(false); + + if (!isOpen) return null; + + const handleSelect = async () => { + if (selectedIndex === null || selectedIndex < 0) return; + const selected = candidates[selectedIndex]; + setIsSubmitting(true); + try { + await onSelect(selected.price, selected.method); + } finally { + setIsSubmitting(false); + } + }; + + const formatPrice = (price: number, currency: string) => { + const symbol = currency === 'EUR' ? '€' : currency === 'GBP' ? '£' : currency === 'CHF' ? 'CHF ' : '$'; + return `${symbol}${price.toFixed(2)}`; + }; + + const getConfidenceLabel = (confidence: number) => { + if (confidence >= 0.8) return 'High'; + if (confidence >= 0.6) return 'Medium'; + return 'Low'; + }; + + const getConfidenceColor = (confidence: number) => { + if (confidence >= 0.8) return '#10b981'; + if (confidence >= 0.6) return '#f59e0b'; + return '#6b7280'; + }; + + // Sort candidates by confidence (highest first) + const sortedCandidates = [...candidates].sort((a, b) => b.confidence - a.confidence); + + return ( +
+ + +
+
+

Multiple Prices Found

+

+ We found different prices for this product. Please select the correct one. +

+
+ +
+ {imageUrl && ( + + )} +
+

{productName || 'Unknown Product'}

+

{url}

+
+
+ +
+
+ {sortedCandidates.map((candidate, index) => { + const originalIndex = candidates.indexOf(candidate); + return ( +
setSelectedIndex(originalIndex)} + > +
+ + + +
+
+ + {formatPrice(candidate.price, candidate.currency)} + + + {getConfidenceLabel(candidate.confidence)} confidence + +
+
+ {METHOD_LABELS[candidate.method] || candidate.method} +
+
+ {candidate.context || METHOD_DESCRIPTIONS[candidate.method] || 'No additional context'} +
+
+ ); + })} +
+
+ +
+ + +
+
+
+ ); +} diff --git a/frontend/src/pages/Dashboard.tsx b/frontend/src/pages/Dashboard.tsx index 5ddd390..dc9f04d 100644 --- a/frontend/src/pages/Dashboard.tsx +++ b/frontend/src/pages/Dashboard.tsx @@ -2,7 +2,13 @@ import { useState, useEffect, useMemo } from 'react'; import Layout from '../components/Layout'; import ProductCard from '../components/ProductCard'; import ProductForm from '../components/ProductForm'; -import { productsApi, pricesApi, Product } from '../api/client'; +import PriceSelectionModal from '../components/PriceSelectionModal'; +import { productsApi, pricesApi, Product, PriceReviewResponse } from '../api/client'; + +// Type guard to check if response needs review +function isPriceReviewResponse(response: Product | PriceReviewResponse): response is PriceReviewResponse { + return 'needsReview' in response && response.needsReview === true; +} type SortOption = 'date_added' | 'name' | 'price' | 'price_change' | 'website'; type SortOrder = 'asc' | 'desc'; @@ -33,6 +39,11 @@ export default function Dashboard() { const [isSavingBulk, setIsSavingBulk] = useState(false); const [showBulkActions, setShowBulkActions] = useState(false); + // Price selection modal state + const [showPriceModal, setShowPriceModal] = useState(false); + const [priceReviewData, setPriceReviewData] = useState(null); + const [pendingRefreshInterval, setPendingRefreshInterval] = useState(3600); + const fetchProducts = async () => { try { const response = await productsApi.getAll(); @@ -58,7 +69,40 @@ export default function Dashboard() { const handleAddProduct = async (url: string, refreshInterval: number) => { const response = await productsApi.create(url, refreshInterval); - setProducts((prev) => [response.data, ...prev]); + + // Check if we need user to select a price + if (isPriceReviewResponse(response.data)) { + setPriceReviewData(response.data); + setPendingRefreshInterval(refreshInterval); + setShowPriceModal(true); + return; // Don't add product yet - wait for user selection + } + + // response.data is a Product at this point + setProducts((prev) => [response.data as Product, ...prev]); + }; + + const handlePriceSelected = async (selectedPrice: number, selectedMethod: string) => { + if (!priceReviewData) return; + + const response = await productsApi.create( + priceReviewData.url, + pendingRefreshInterval, + selectedPrice, + selectedMethod + ); + + // When selecting a price, the API should always return a Product + if (!isPriceReviewResponse(response.data)) { + setProducts((prev) => [response.data as Product, ...prev]); + } + setShowPriceModal(false); + setPriceReviewData(null); + }; + + const handlePriceModalClose = () => { + setShowPriceModal(false); + setPriceReviewData(null); }; const handleDeleteProduct = async (id: number) => { @@ -641,6 +685,18 @@ export default function Dashboard() { + {/* Price Selection Modal */} + + {error &&
{error}
} {/* Dashboard Summary */}