feat: Multi-strategy price voting system with user selection

- Add multi-strategy voting: runs JSON-LD, site-specific, generic CSS,
  and AI extraction methods in parallel
- Implement consensus voting to select the correct price when methods agree
- Add AI arbitration when extraction methods disagree
- Add PriceSelectionModal for users to select correct price when ambiguous
- Store preferred extraction method per product for faster re-checks
- Add database columns for preferred_extraction_method and needs_price_review

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-24 14:45:51 -05:00
parent 40c45b49c8
commit 4fd04cd160
10 changed files with 1259 additions and 12 deletions

View file

@ -5,6 +5,27 @@ All notable changes to PriceGhost will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.0.4] - 2026-01-24
### Added
- **Multi-Strategy Price Voting System** - More robust price extraction using multiple methods
- Runs all extraction methods (JSON-LD, site-specific, generic CSS, AI) in parallel
- Uses consensus voting to select the correct price when methods agree
- AI arbitration when extraction methods disagree
- User price selection dialog when price is ambiguous (multiple prices found)
- Remembers the winning extraction method for future checks of the same product
- **Price Selection Modal** - When multiple prices are found for a product, users can now select the correct one
- Shows all price candidates with confidence levels
- Displays extraction method and context for each candidate
- Sorted by confidence (highest first)
### Changed
- **Improved scheduler** - Now uses preferred extraction method when available for faster, more accurate re-checks
---
## [1.0.3] - 2026-01-24
### Added
@ -141,6 +162,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| Version | Date | Description |
|---------|------|-------------|
| 1.0.4 | 2026-01-24 | Multi-strategy price voting system with user selection for ambiguous prices |
| 1.0.3 | 2026-01-24 | Notification history with bell icon, clear button, and full history page |
| 1.0.2 | 2026-01-23 | Fixed stock status false positives for in-stock items |
| 1.0.1 | 2026-01-23 | Bug fixes, JS-rendered price support, pre-order detection |

View file

@ -151,6 +151,22 @@ async function runMigrations() {
END $$;
`);
// Add multi-strategy voting columns to products table
await client.query(`
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'preferred_extraction_method') THEN
ALTER TABLE products ADD COLUMN preferred_extraction_method VARCHAR(20);
END IF;
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'needs_price_review') THEN
ALTER TABLE products ADD COLUMN needs_price_review BOOLEAN DEFAULT false;
END IF;
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'price_candidates') THEN
ALTER TABLE products ADD COLUMN price_candidates JSONB;
END IF;
END $$;
`);
// Create notification_history table for tracking all triggered notifications
await client.query(`
CREATE TABLE IF NOT EXISTS notification_history (

View file

@ -551,6 +551,21 @@ export const productQueries = {
);
return result.rows;
},
updateExtractionMethod: async (id: number, method: string): Promise<void> => {
await pool.query(
'UPDATE products SET preferred_extraction_method = $1, needs_price_review = false WHERE id = $2',
[method, id]
);
},
getPreferredExtractionMethod: async (id: number): Promise<string | null> => {
const result = await pool.query(
'SELECT preferred_extraction_method FROM products WHERE id = $1',
[id]
);
return result.rows[0]?.preferred_extraction_method || null;
},
};
// Price History types and queries

View file

@ -1,7 +1,7 @@
import { Router, Response } from 'express';
import { AuthRequest, authMiddleware } from '../middleware/auth';
import { productQueries, priceHistoryQueries, stockStatusHistoryQueries } from '../models';
import { scrapeProduct } from '../services/scraper';
import { scrapeProduct, scrapeProductWithVoting, ExtractionMethod } from '../services/scraper';
const router = Router();
@ -20,11 +20,11 @@ router.get('/', async (req: AuthRequest, res: Response) => {
}
});
// Add a new product to track
// Add a new product to track (with multi-strategy voting)
router.post('/', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const { url, refresh_interval } = req.body;
const { url, refresh_interval, selectedPrice, selectedMethod } = req.body;
if (!url) {
res.status(400).json({ error: 'URL is required' });
@ -39,8 +39,47 @@ router.post('/', async (req: AuthRequest, res: Response) => {
return;
}
// Scrape product info (pass userId for AI fallback)
const scrapedData = await scrapeProduct(url, userId);
// If user is confirming a price selection, use the old scraper with their choice
if (selectedPrice !== undefined && selectedMethod) {
// User has selected a price from candidates - use it directly
const scrapedData = await scrapeProduct(url, userId);
// Create product with the user-selected price
const product = await productQueries.create(
userId,
url,
scrapedData.name,
scrapedData.imageUrl,
refresh_interval || 3600,
scrapedData.stockStatus
);
// Store the preferred extraction method and the user-selected price
await productQueries.updateExtractionMethod(product.id, selectedMethod);
// Record the user-selected price
await priceHistoryQueries.create(
product.id,
selectedPrice,
'USD', // TODO: Get currency from selection
null
);
// Record initial stock status
if (scrapedData.stockStatus !== 'unknown') {
await stockStatusHistoryQueries.recordChange(product.id, scrapedData.stockStatus);
}
// Update last_checked timestamp
await productQueries.updateLastChecked(product.id, product.refresh_interval);
const productWithPrice = await productQueries.findById(product.id, userId);
res.status(201).json(productWithPrice);
return;
}
// Use multi-strategy voting scraper
const scrapedData = await scrapeProductWithVoting(url, userId);
// Allow adding out-of-stock products, but require a price for in-stock ones
if (!scrapedData.price && scrapedData.stockStatus !== 'out_of_stock') {
@ -50,6 +89,26 @@ router.post('/', async (req: AuthRequest, res: Response) => {
return;
}
// If needsReview is true and there are multiple candidates, return them for user selection
if (scrapedData.needsReview && scrapedData.priceCandidates.length > 1) {
res.status(200).json({
needsReview: true,
name: scrapedData.name,
imageUrl: scrapedData.imageUrl,
stockStatus: scrapedData.stockStatus,
priceCandidates: scrapedData.priceCandidates.map(c => ({
price: c.price,
currency: c.currency,
method: c.method,
context: c.context,
confidence: c.confidence,
})),
suggestedPrice: scrapedData.price,
url,
});
return;
}
// Create product with stock status
const product = await productQueries.create(
userId,
@ -60,6 +119,11 @@ router.post('/', async (req: AuthRequest, res: Response) => {
scrapedData.stockStatus
);
// Store the extraction method that worked
if (scrapedData.selectedMethod) {
await productQueries.updateExtractionMethod(product.id, scrapedData.selectedMethod);
}
// Record initial price if available
if (scrapedData.price) {
await priceHistoryQueries.create(

View file

@ -4,7 +4,7 @@ import axios from 'axios';
import { load } from 'cheerio';
import { AISettings } from '../models';
import { ParsedPrice } from '../utils/priceParser';
import { StockStatus } from './scraper';
import { StockStatus, PriceCandidate } from './scraper';
export interface AIExtractionResult {
name: string | null;
@ -548,3 +548,211 @@ export async function tryAIVerification(
return null;
}
}
// Arbitration prompt for when multiple extraction methods disagree
const ARBITRATION_PROMPT = `You are a price arbitration assistant. Multiple price extraction methods found different prices for the same product. Help determine the correct price.
Found prices:
$CANDIDATES$
Analyze the HTML content below and determine which price is the correct CURRENT selling price for the main product.
Consider:
- JSON-LD structured data is usually highly reliable (schema.org standard)
- Site-specific extractors are well-tested for major retailers
- Generic CSS selectors might catch wrong prices (shipping, savings, bundles, etc.)
- Look for the price that appears in the main product display area
- Ignore crossed-out/original prices, shipping costs, subscription prices, or bundle prices
Return a JSON object with:
- selectedIndex: the 0-based index of the correct price from the list above
- confidence: your confidence from 0 to 1
- reason: brief explanation of why this price is correct
Only return valid JSON, no explanation text outside the JSON.
HTML Content:
`;
export interface AIArbitrationResult {
selectedPrice: PriceCandidate | null;
confidence: number;
reason: string;
}
async function arbitrateWithAnthropic(
html: string,
candidates: PriceCandidate[],
apiKey: string
): Promise<AIArbitrationResult> {
const anthropic = new Anthropic({ apiKey });
const candidatesList = candidates.map((c, i) =>
`${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
).join('\n');
const preparedHtml = prepareHtmlForAI(html);
const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
const response = await anthropic.messages.create({
model: 'claude-3-haiku-20240307',
max_tokens: 512,
messages: [{ role: 'user', content: prompt }],
});
const content = response.content[0];
if (content.type !== 'text') {
throw new Error('Unexpected response type from Anthropic');
}
return parseArbitrationResponse(content.text, candidates);
}
async function arbitrateWithOpenAI(
html: string,
candidates: PriceCandidate[],
apiKey: string
): Promise<AIArbitrationResult> {
const openai = new OpenAI({ apiKey });
const candidatesList = candidates.map((c, i) =>
`${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
).join('\n');
const preparedHtml = prepareHtmlForAI(html);
const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
max_tokens: 512,
messages: [{ role: 'user', content: prompt }],
});
const content = response.choices[0]?.message?.content;
if (!content) {
throw new Error('No response from OpenAI');
}
return parseArbitrationResponse(content, candidates);
}
async function arbitrateWithOllama(
html: string,
candidates: PriceCandidate[],
baseUrl: string,
model: string
): Promise<AIArbitrationResult> {
const candidatesList = candidates.map((c, i) =>
`${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
).join('\n');
const preparedHtml = prepareHtmlForAI(html);
const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
const response = await axios.post(
`${baseUrl}/api/chat`,
{
model: model,
messages: [{ role: 'user', content: prompt }],
stream: false,
},
{
headers: { 'Content-Type': 'application/json' },
timeout: 120000,
}
);
const content = response.data?.message?.content;
if (!content) {
throw new Error('No response from Ollama');
}
return parseArbitrationResponse(content, candidates);
}
function parseArbitrationResponse(
responseText: string,
candidates: PriceCandidate[]
): AIArbitrationResult {
console.log(`[AI Arbitrate] Raw response: ${responseText.substring(0, 500)}...`);
const defaultResult: AIArbitrationResult = {
selectedPrice: null,
confidence: 0,
reason: 'Could not parse AI response',
};
let jsonStr = responseText.trim();
// Handle markdown code blocks
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (jsonMatch) {
jsonStr = jsonMatch[1].trim();
}
// Try to find JSON object
const objectMatch = jsonStr.match(/\{[\s\S]*\}/);
if (objectMatch) {
jsonStr = objectMatch[0];
}
try {
const data = JSON.parse(jsonStr);
console.log(`[AI Arbitrate] Parsed:`, JSON.stringify(data, null, 2));
const selectedIndex = data.selectedIndex;
if (typeof selectedIndex === 'number' && selectedIndex >= 0 && selectedIndex < candidates.length) {
return {
selectedPrice: candidates[selectedIndex],
confidence: data.confidence ?? 0.7,
reason: data.reason || 'AI selected this price',
};
}
return defaultResult;
} catch (error) {
console.error('[AI Arbitrate] Failed to parse response:', responseText);
return defaultResult;
}
}
// Export for use in voting scraper to arbitrate between disagreeing methods
export async function tryAIArbitration(
url: string,
html: string,
candidates: PriceCandidate[],
userId: number
): Promise<AIArbitrationResult | null> {
try {
const { userQueries } = await import('../models');
const settings = await userQueries.getAISettings(userId);
// Need AI enabled for arbitration
if (!settings?.ai_enabled && !settings?.ai_verification_enabled) {
return null;
}
// Need at least 2 candidates to arbitrate
if (candidates.length < 2) {
return null;
}
// Use the configured provider
if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
console.log(`[AI Arbitrate] Using Anthropic to arbitrate ${candidates.length} prices for ${url}`);
return await arbitrateWithAnthropic(html, candidates, settings.anthropic_api_key);
} else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
console.log(`[AI Arbitrate] Using OpenAI to arbitrate ${candidates.length} prices for ${url}`);
return await arbitrateWithOpenAI(html, candidates, settings.openai_api_key);
} else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) {
console.log(`[AI Arbitrate] Using Ollama to arbitrate ${candidates.length} prices for ${url}`);
return await arbitrateWithOllama(html, candidates, settings.ollama_base_url, settings.ollama_model);
}
console.log(`[AI Arbitrate] No provider configured`);
return null;
} catch (error) {
console.error(`[AI Arbitrate] Arbitration failed for ${url}:`, error);
return null;
}
}

View file

@ -1,6 +1,6 @@
import cron from 'node-cron';
import { productQueries, priceHistoryQueries, userQueries, stockStatusHistoryQueries, notificationHistoryQueries, NotificationType } from '../models';
import { scrapeProduct } from './scraper';
import { scrapeProduct, scrapeProductWithVoting, ExtractionMethod } from './scraper';
import { sendNotifications, NotificationPayload } from './notifications';
let isRunning = false;
@ -23,7 +23,15 @@ async function checkPrices(): Promise<void> {
try {
console.log(`Checking price for product ${product.id}: ${product.url}`);
const scrapedData = await scrapeProduct(product.url, product.user_id);
// Get preferred extraction method for this product (if user previously selected one)
const preferredMethod = await productQueries.getPreferredExtractionMethod(product.id);
// Use voting scraper with preferred method if available
const scrapedData = await scrapeProductWithVoting(
product.url,
product.user_id,
preferredMethod as ExtractionMethod | undefined
);
// Check for back-in-stock notification
const wasOutOfStock = product.stock_status === 'out_of_stock';

View file

@ -13,6 +13,220 @@ puppeteer.use(StealthPlugin());
export type StockStatus = 'in_stock' | 'out_of_stock' | 'unknown';
// Extraction method types for multi-strategy voting
export type ExtractionMethod = 'json-ld' | 'site-specific' | 'generic-css' | 'ai';
// Price candidate from a single extraction method
export interface PriceCandidate {
price: number;
currency: string;
method: ExtractionMethod;
context?: string; // Text around the price for user context
confidence: number; // 0-1 confidence score
}
// Extended scrape result with candidates for voting
export interface ScrapedProductWithCandidates {
name: string | null;
price: ParsedPrice | null;
imageUrl: string | null;
url: string;
stockStatus: StockStatus;
aiStatus: 'verified' | 'corrected' | null;
priceCandidates: PriceCandidate[];
needsReview: boolean;
selectedMethod?: ExtractionMethod; // Which method was used for final price
}
// Check if two prices are "close enough" to be considered the same (within 5%)
function pricesMatch(price1: number, price2: number): boolean {
if (price1 === price2) return true;
const diff = Math.abs(price1 - price2);
const avg = (price1 + price2) / 2;
return (diff / avg) < 0.05; // Within 5%
}
// Find consensus among price candidates
function findPriceConsensus(candidates: PriceCandidate[]): { price: PriceCandidate | null; hasConsensus: boolean; groups: PriceCandidate[][] } {
if (candidates.length === 0) return { price: null, hasConsensus: false, groups: [] };
if (candidates.length === 1) return { price: candidates[0], hasConsensus: true, groups: [[candidates[0]]] };
// Group prices that match
const groups: PriceCandidate[][] = [];
for (const candidate of candidates) {
let foundGroup = false;
for (const group of groups) {
if (pricesMatch(candidate.price, group[0].price)) {
group.push(candidate);
foundGroup = true;
break;
}
}
if (!foundGroup) {
groups.push([candidate]);
}
}
// Sort groups by size (most votes first), then by confidence
groups.sort((a, b) => {
if (b.length !== a.length) return b.length - a.length;
const avgConfA = a.reduce((sum, c) => sum + c.confidence, 0) / a.length;
const avgConfB = b.reduce((sum, c) => sum + c.confidence, 0) / b.length;
return avgConfB - avgConfA;
});
const largestGroup = groups[0];
// Consensus if majority agrees (>= 50% of methods) OR if top group has significantly more votes
const hasConsensus = largestGroup.length >= Math.ceil(candidates.length / 2) ||
(groups.length > 1 && largestGroup.length > groups[1].length);
// Pick the highest confidence candidate from the winning group
const winner = largestGroup.sort((a, b) => b.confidence - a.confidence)[0];
return { price: winner, hasConsensus, groups };
}
// Extract price candidates from JSON-LD structured data
function extractJsonLdCandidates($: CheerioAPI): PriceCandidate[] {
const candidates: PriceCandidate[] = [];
try {
const scripts = $('script[type="application/ld+json"]');
for (let i = 0; i < scripts.length; i++) {
const content = $(scripts[i]).html();
if (!content) continue;
const data = JSON.parse(content) as JsonLdProduct | JsonLdProduct[];
const product = findProduct(data);
if (product?.offers) {
const offer = Array.isArray(product.offers) ? product.offers[0] : product.offers;
const priceValue = offer.lowPrice || offer.price || offer.priceSpecification?.price;
const currency = offer.priceCurrency || offer.priceSpecification?.priceCurrency || 'USD';
if (priceValue) {
const price = parseFloat(String(priceValue));
if (!isNaN(price) && price > 0) {
candidates.push({
price,
currency,
method: 'json-ld',
context: `Structured data: ${product.name || 'Product'}`,
confidence: 0.9, // JSON-LD is highly reliable
});
}
}
}
}
} catch (_e) {
// JSON parse error
}
return candidates;
}
// Extract price candidates from site-specific scraper
function extractSiteSpecificCandidates($: CheerioAPI, url: string): { candidates: PriceCandidate[]; name: string | null; imageUrl: string | null; stockStatus: StockStatus } {
const candidates: PriceCandidate[] = [];
let name: string | null = null;
let imageUrl: string | null = null;
let stockStatus: StockStatus = 'unknown';
const siteScraper = siteScrapers.find((s) => s.match(url));
if (siteScraper) {
const siteResult = siteScraper.scrape($, url);
if (siteResult.price) {
candidates.push({
price: siteResult.price.price,
currency: siteResult.price.currency,
method: 'site-specific',
context: `Site-specific extractor for ${new URL(url).hostname}`,
confidence: 0.85, // Site-specific scrapers are well-tested
});
}
name = siteResult.name || null;
imageUrl = siteResult.imageUrl || null;
stockStatus = siteResult.stockStatus || 'unknown';
}
return { candidates, name, imageUrl, stockStatus };
}
// Extract price candidates from generic CSS selectors
function extractGenericCssCandidates($: CheerioAPI): PriceCandidate[] {
const candidates: PriceCandidate[] = [];
const seen = new Set<number>();
for (const selector of genericPriceSelectors) {
const elements = $(selector);
elements.each((_, el) => {
const $el = $(el);
// Skip if this looks like an "original" or "was" price
const classAttr = $el.attr('class') || '';
const parentClass = $el.parent().attr('class') || '';
if (/original|was|old|regular|compare|strikethrough|line-through/i.test(classAttr + parentClass)) {
return;
}
// Check various attributes where price might be stored
const priceAmount = $el.attr('data-price-amount');
const dataPrice = $el.attr('data-price');
const content = $el.attr('content');
const text = $el.text();
let parsed: ParsedPrice | null = null;
let context = selector;
// Try data-price-amount first (Magento stores numeric value here)
if (priceAmount) {
const price = parseFloat(priceAmount);
if (!isNaN(price) && price > 0) {
let currency = 'USD';
const textSources = [text, $el.parent().text(), $el.closest('.price-box').text()];
for (const source of textSources) {
if (!source) continue;
const currencyCodeMatch = source.match(/\b(CHF|EUR|GBP|USD|CAD|AUD|JPY|INR)\b/i);
if (currencyCodeMatch) {
currency = currencyCodeMatch[1].toUpperCase();
break;
}
const symbolMatch = source.match(/([$€£¥₹])/);
if (symbolMatch) {
const symbolMap: Record<string, string> = { '$': 'USD', '€': 'EUR', '£': 'GBP', '¥': 'JPY', '₹': 'INR' };
currency = symbolMap[symbolMatch[1]] || 'USD';
break;
}
}
parsed = { price, currency };
context = `data-price-amount attribute`;
}
}
if (!parsed) {
const priceStr = content || dataPrice || text;
parsed = parsePrice(priceStr);
if (parsed) {
context = text.trim().slice(0, 50);
}
}
if (parsed && parsed.price > 0 && !seen.has(parsed.price)) {
seen.add(parsed.price);
candidates.push({
price: parsed.price,
currency: parsed.currency,
method: 'generic-css',
context,
confidence: 0.6, // Generic CSS is less reliable
});
}
});
// Only take first few generic candidates to avoid noise
if (candidates.length >= 3) break;
}
return candidates;
}
// Browser-based scraping for sites that block HTTP requests (e.g., Cloudflare)
async function scrapeWithBrowser(url: string): Promise<string> {
const browser = await puppeteer.launch({
@ -1059,6 +1273,287 @@ export async function scrapeProduct(url: string, userId?: number): Promise<Scrap
return result;
}
/**
* Multi-strategy voting scraper with user review support.
* Runs all extraction methods, finds consensus, and flags ambiguous cases for user review.
*/
export async function scrapeProductWithVoting(
url: string,
userId?: number,
preferredMethod?: ExtractionMethod
): Promise<ScrapedProductWithCandidates> {
const result: ScrapedProductWithCandidates = {
name: null,
price: null,
imageUrl: null,
url,
stockStatus: 'unknown',
aiStatus: null,
priceCandidates: [],
needsReview: false,
};
let html: string = '';
try {
let usedBrowser = false;
// Fetch HTML
try {
const response = await axios.get<string>(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'no-cache',
Pragma: 'no-cache',
'Sec-Ch-Ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
},
timeout: 20000,
maxRedirects: 5,
});
html = response.data;
} catch (axiosError) {
if (axiosError instanceof AxiosError && axiosError.response?.status === 403) {
console.log(`[Voting] HTTP blocked (403) for ${url}, using browser...`);
html = await scrapeWithBrowser(url);
usedBrowser = true;
} else {
throw axiosError;
}
}
let $ = load(html);
// Collect candidates from all methods
const allCandidates: PriceCandidate[] = [];
// 1. JSON-LD extraction (highest reliability)
const jsonLdCandidates = extractJsonLdCandidates($);
allCandidates.push(...jsonLdCandidates);
console.log(`[Voting] JSON-LD found ${jsonLdCandidates.length} candidates`);
// 2. Site-specific extraction
const siteResult = extractSiteSpecificCandidates($, url);
allCandidates.push(...siteResult.candidates);
if (siteResult.name) result.name = siteResult.name;
if (siteResult.imageUrl) result.imageUrl = siteResult.imageUrl;
if (siteResult.stockStatus !== 'unknown') result.stockStatus = siteResult.stockStatus;
console.log(`[Voting] Site-specific found ${siteResult.candidates.length} candidates`);
// 3. Generic CSS extraction
const genericCandidates = extractGenericCssCandidates($);
allCandidates.push(...genericCandidates);
console.log(`[Voting] Generic CSS found ${genericCandidates.length} candidates`);
// If no candidates found in static HTML, try browser rendering
if (allCandidates.length === 0 && !usedBrowser) {
console.log(`[Voting] No candidates in static HTML, trying browser...`);
try {
html = await scrapeWithBrowser(url);
usedBrowser = true;
$ = load(html);
// Re-run all extraction methods
allCandidates.push(...extractJsonLdCandidates($));
const browserSiteResult = extractSiteSpecificCandidates($, url);
allCandidates.push(...browserSiteResult.candidates);
if (!result.name && browserSiteResult.name) result.name = browserSiteResult.name;
if (!result.imageUrl && browserSiteResult.imageUrl) result.imageUrl = browserSiteResult.imageUrl;
if (result.stockStatus === 'unknown' && browserSiteResult.stockStatus !== 'unknown') {
result.stockStatus = browserSiteResult.stockStatus;
}
allCandidates.push(...extractGenericCssCandidates($));
console.log(`[Voting] Browser found ${allCandidates.length} total candidates`);
} catch (browserError) {
console.error(`[Voting] Browser fallback failed:`, browserError);
}
}
// Fill in missing metadata
if (!result.name) {
result.name = extractGenericName($) || $('meta[property="og:title"]').attr('content') || null;
}
if (!result.imageUrl) {
result.imageUrl = extractGenericImage($, url) || $('meta[property="og:image"]').attr('content') || null;
}
if (result.stockStatus === 'unknown') {
result.stockStatus = extractGenericStockStatus($);
}
// Store all candidates
result.priceCandidates = allCandidates;
// If user has a preferred method, try to use it
if (preferredMethod && allCandidates.length > 0) {
const preferredCandidate = allCandidates.find(c => c.method === preferredMethod);
if (preferredCandidate) {
console.log(`[Voting] Using preferred method ${preferredMethod}: ${preferredCandidate.price}`);
result.price = { price: preferredCandidate.price, currency: preferredCandidate.currency };
result.selectedMethod = preferredMethod;
return result;
}
}
// Find consensus
const { price: consensusPrice, hasConsensus, groups } = findPriceConsensus(allCandidates);
console.log(`[Voting] Consensus: ${hasConsensus}, Groups: ${groups.length}, Winner: ${consensusPrice?.price}`);
if (hasConsensus && consensusPrice) {
// Clear consensus - use the winning price
result.price = { price: consensusPrice.price, currency: consensusPrice.currency };
result.selectedMethod = consensusPrice.method;
console.log(`[Voting] Consensus price: ${consensusPrice.price} via ${consensusPrice.method}`);
} else if (allCandidates.length > 0) {
// No consensus - try AI arbitration if available
if (userId && html) {
console.log(`[Voting] No consensus, trying AI arbitration...`);
try {
const { tryAIArbitration } = await import('./ai-extractor');
const aiResult = await tryAIArbitration(url, html, allCandidates, userId);
if (aiResult && aiResult.selectedPrice) {
console.log(`[Voting] AI selected price: ${aiResult.selectedPrice.price} (reason: ${aiResult.reason})`);
result.price = { price: aiResult.selectedPrice.price, currency: aiResult.selectedPrice.currency };
result.selectedMethod = aiResult.selectedPrice.method;
result.aiStatus = 'verified';
// Add AI as a candidate for transparency
if (!allCandidates.find(c => c.method === 'ai')) {
result.priceCandidates.push({
price: aiResult.selectedPrice.price,
currency: aiResult.selectedPrice.currency,
method: 'ai',
context: `AI arbitration: ${aiResult.reason}`,
confidence: aiResult.confidence || 0.8,
});
}
} else {
// AI couldn't decide either - flag for user review
console.log(`[Voting] AI couldn't decide, flagging for user review`);
result.needsReview = true;
// Use the most confident candidate as default
const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0];
result.price = { price: bestCandidate.price, currency: bestCandidate.currency };
result.selectedMethod = bestCandidate.method;
}
} catch (aiError) {
console.error(`[Voting] AI arbitration failed:`, aiError);
// Fall back to flagging for user review
result.needsReview = true;
const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0];
result.price = { price: bestCandidate.price, currency: bestCandidate.currency };
result.selectedMethod = bestCandidate.method;
}
} else {
// No AI available - flag for user review if multiple prices differ significantly
if (groups.length > 1) {
result.needsReview = true;
console.log(`[Voting] Multiple price groups found, flagging for user review`);
}
// Use the most confident candidate as default
const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0];
result.price = { price: bestCandidate.price, currency: bestCandidate.currency };
result.selectedMethod = bestCandidate.method;
}
} else {
// No candidates at all - try pure AI extraction
if (userId && html) {
console.log(`[Voting] No candidates found, trying AI extraction...`);
try {
const { tryAIExtraction } = await import('./ai-extractor');
const aiResult = await tryAIExtraction(url, html, userId);
if (aiResult && aiResult.price && aiResult.confidence > 0.5) {
console.log(`[Voting] AI extracted price: ${aiResult.price.price}`);
result.price = aiResult.price;
result.selectedMethod = 'ai';
result.priceCandidates.push({
price: aiResult.price.price,
currency: aiResult.price.currency,
method: 'ai',
context: 'AI extraction (no other methods found price)',
confidence: aiResult.confidence,
});
if (!result.name && aiResult.name) result.name = aiResult.name;
if (!result.imageUrl && aiResult.imageUrl) result.imageUrl = aiResult.imageUrl;
if (result.stockStatus === 'unknown' && aiResult.stockStatus !== 'unknown') {
result.stockStatus = aiResult.stockStatus;
}
}
} catch (aiError) {
console.error(`[Voting] AI extraction failed:`, aiError);
}
}
}
// If we have a price but AI is available, verify it
if (result.price && userId && html && !result.aiStatus) {
try {
const { tryAIVerification } = await import('./ai-extractor');
const verifyResult = await tryAIVerification(
url,
html,
result.price.price,
result.price.currency,
userId
);
if (verifyResult) {
if (verifyResult.isCorrect) {
result.aiStatus = 'verified';
} else if (verifyResult.suggestedPrice && verifyResult.confidence > 0.7) {
// AI suggests a different price - this might indicate we need review
const existingCandidate = allCandidates.find(c =>
pricesMatch(c.price, verifyResult.suggestedPrice!.price)
);
if (existingCandidate) {
// AI agrees with one of our candidates - use that
result.price = verifyResult.suggestedPrice;
result.selectedMethod = existingCandidate.method;
result.aiStatus = 'corrected';
} else if (!result.needsReview) {
// AI suggests a price we didn't find - flag for review
result.needsReview = true;
result.priceCandidates.push({
price: verifyResult.suggestedPrice.price,
currency: verifyResult.suggestedPrice.currency,
method: 'ai',
context: `AI suggestion: ${verifyResult.reason}`,
confidence: verifyResult.confidence,
});
}
}
// Update stock status from AI
if (verifyResult.stockStatus && verifyResult.stockStatus !== 'unknown') {
if (result.stockStatus === 'unknown' || verifyResult.stockStatus === 'out_of_stock') {
result.stockStatus = verifyResult.stockStatus;
}
}
}
} catch (verifyError) {
console.error(`[Voting] AI verification failed:`, verifyError);
}
}
} catch (error) {
console.error(`[Voting] Error scraping ${url}:`, error);
}
return result;
}
interface JsonLdProduct {
'@type'?: string;
'@graph'?: JsonLdProduct[];

View file

@ -83,6 +83,27 @@ export interface ProductWithStats extends Product {
} | null;
}
// Response when product needs price review
export interface PriceCandidate {
price: number;
currency: string;
method: string;
context?: string;
confidence: number;
}
export interface PriceReviewResponse {
needsReview: true;
name: string | null;
imageUrl: string | null;
stockStatus: string;
priceCandidates: PriceCandidate[];
suggestedPrice: { price: number; currency: string } | null;
url: string;
}
export type CreateProductResponse = Product | PriceReviewResponse;
export interface PriceHistory {
id: number;
product_id: number;
@ -96,8 +117,13 @@ export const productsApi = {
getById: (id: number) => api.get<ProductWithStats>(`/products/${id}`),
create: (url: string, refreshInterval?: number) =>
api.post<Product>('/products', { url, refresh_interval: refreshInterval }),
create: (url: string, refreshInterval?: number, selectedPrice?: number, selectedMethod?: string) =>
api.post<CreateProductResponse>('/products', {
url,
refresh_interval: refreshInterval,
selectedPrice,
selectedMethod,
}),
update: (id: number, data: {
name?: string;

View file

@ -0,0 +1,337 @@
import { useState } from 'react';
export interface PriceCandidate {
price: number;
currency: string;
method: string;
context?: string;
confidence: number;
}
interface PriceSelectionModalProps {
isOpen: boolean;
onClose: () => void;
onSelect: (price: number, method: string) => void;
productName: string | null;
imageUrl: string | null;
candidates: PriceCandidate[];
suggestedPrice: { price: number; currency: string } | null;
url: string;
}
const METHOD_LABELS: Record<string, string> = {
'json-ld': 'Structured Data',
'site-specific': 'Site Scraper',
'generic-css': 'CSS Selector',
'ai': 'AI Extraction',
};
const METHOD_DESCRIPTIONS: Record<string, string> = {
'json-ld': 'Found in page metadata (schema.org)',
'site-specific': 'Extracted using site-specific rules',
'generic-css': 'Found using general price selectors',
'ai': 'Detected by AI analysis',
};
export default function PriceSelectionModal({
isOpen,
onClose,
onSelect,
productName,
imageUrl,
candidates,
suggestedPrice,
url,
}: PriceSelectionModalProps) {
const [selectedIndex, setSelectedIndex] = useState<number | null>(
suggestedPrice
? candidates.findIndex(c => c.price === suggestedPrice.price)
: 0
);
const [isSubmitting, setIsSubmitting] = useState(false);
if (!isOpen) return null;
const handleSelect = async () => {
if (selectedIndex === null || selectedIndex < 0) return;
const selected = candidates[selectedIndex];
setIsSubmitting(true);
try {
await onSelect(selected.price, selected.method);
} finally {
setIsSubmitting(false);
}
};
const formatPrice = (price: number, currency: string) => {
const symbol = currency === 'EUR' ? '€' : currency === 'GBP' ? '£' : currency === 'CHF' ? 'CHF ' : '$';
return `${symbol}${price.toFixed(2)}`;
};
const getConfidenceLabel = (confidence: number) => {
if (confidence >= 0.8) return 'High';
if (confidence >= 0.6) return 'Medium';
return 'Low';
};
const getConfidenceColor = (confidence: number) => {
if (confidence >= 0.8) return '#10b981';
if (confidence >= 0.6) return '#f59e0b';
return '#6b7280';
};
// Sort candidates by confidence (highest first)
const sortedCandidates = [...candidates].sort((a, b) => b.confidence - a.confidence);
return (
<div className="price-modal-overlay">
<style>{`
.price-modal-overlay {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.5);
backdrop-filter: blur(4px);
display: flex;
align-items: center;
justify-content: center;
z-index: 1000;
padding: 1rem;
}
.price-modal {
background: var(--surface);
border-radius: 1rem;
box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
max-width: 500px;
width: 100%;
max-height: 90vh;
overflow: hidden;
display: flex;
flex-direction: column;
}
.price-modal-header {
padding: 1.5rem;
border-bottom: 1px solid var(--border);
}
.price-modal-title {
font-size: 1.25rem;
font-weight: 600;
color: var(--text);
margin: 0 0 0.5rem 0;
}
.price-modal-subtitle {
font-size: 0.875rem;
color: var(--text-muted);
margin: 0;
}
.price-modal-product {
display: flex;
gap: 1rem;
padding: 1rem 1.5rem;
background: var(--background);
border-bottom: 1px solid var(--border);
}
.price-modal-product-image {
width: 64px;
height: 64px;
object-fit: contain;
border-radius: 0.5rem;
background: white;
}
.price-modal-product-info {
flex: 1;
min-width: 0;
}
.price-modal-product-name {
font-weight: 500;
color: var(--text);
margin: 0 0 0.25rem 0;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.price-modal-product-url {
font-size: 0.75rem;
color: var(--text-muted);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.price-modal-body {
padding: 1rem 1.5rem;
overflow-y: auto;
flex: 1;
}
.price-candidates-list {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.price-candidate {
border: 2px solid var(--border);
border-radius: 0.75rem;
padding: 1rem;
cursor: pointer;
transition: all 0.2s;
position: relative;
}
.price-candidate:hover {
border-color: var(--primary);
background: var(--background);
}
.price-candidate.selected {
border-color: var(--primary);
background: rgba(99, 102, 241, 0.1);
}
.price-candidate-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 0.5rem;
}
.price-candidate-price {
font-size: 1.25rem;
font-weight: 700;
color: var(--text);
}
.price-candidate-confidence {
font-size: 0.75rem;
font-weight: 500;
padding: 0.25rem 0.5rem;
border-radius: 9999px;
background: var(--background);
}
.price-candidate-method {
font-size: 0.875rem;
font-weight: 500;
color: var(--text);
margin-bottom: 0.25rem;
}
.price-candidate-context {
font-size: 0.75rem;
color: var(--text-muted);
}
.price-candidate-check {
position: absolute;
top: 0.75rem;
right: 0.75rem;
width: 24px;
height: 24px;
border-radius: 50%;
background: var(--primary);
color: white;
display: flex;
align-items: center;
justify-content: center;
opacity: 0;
transition: opacity 0.2s;
}
.price-candidate.selected .price-candidate-check {
opacity: 1;
}
.price-modal-footer {
padding: 1rem 1.5rem;
border-top: 1px solid var(--border);
display: flex;
gap: 0.75rem;
justify-content: flex-end;
}
.price-modal-footer .btn {
min-width: 100px;
}
`}</style>
<div className="price-modal">
<div className="price-modal-header">
<h2 className="price-modal-title">Multiple Prices Found</h2>
<p className="price-modal-subtitle">
We found different prices for this product. Please select the correct one.
</p>
</div>
<div className="price-modal-product">
{imageUrl && (
<img src={imageUrl} alt="" className="price-modal-product-image" />
)}
<div className="price-modal-product-info">
<p className="price-modal-product-name">{productName || 'Unknown Product'}</p>
<p className="price-modal-product-url">{url}</p>
</div>
</div>
<div className="price-modal-body">
<div className="price-candidates-list">
{sortedCandidates.map((candidate, index) => {
const originalIndex = candidates.indexOf(candidate);
return (
<div
key={index}
className={`price-candidate ${selectedIndex === originalIndex ? 'selected' : ''}`}
onClick={() => setSelectedIndex(originalIndex)}
>
<div className="price-candidate-check">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="3" strokeLinecap="round" strokeLinejoin="round">
<polyline points="20 6 9 17 4 12" />
</svg>
</div>
<div className="price-candidate-header">
<span className="price-candidate-price">
{formatPrice(candidate.price, candidate.currency)}
</span>
<span
className="price-candidate-confidence"
style={{ color: getConfidenceColor(candidate.confidence) }}
>
{getConfidenceLabel(candidate.confidence)} confidence
</span>
</div>
<div className="price-candidate-method">
{METHOD_LABELS[candidate.method] || candidate.method}
</div>
<div className="price-candidate-context">
{candidate.context || METHOD_DESCRIPTIONS[candidate.method] || 'No additional context'}
</div>
</div>
);
})}
</div>
</div>
<div className="price-modal-footer">
<button className="btn btn-secondary" onClick={onClose} disabled={isSubmitting}>
Cancel
</button>
<button
className="btn btn-primary"
onClick={handleSelect}
disabled={selectedIndex === null || isSubmitting}
>
{isSubmitting ? <span className="spinner" /> : 'Confirm Selection'}
</button>
</div>
</div>
</div>
);
}

View file

@ -2,7 +2,13 @@ import { useState, useEffect, useMemo } from 'react';
import Layout from '../components/Layout';
import ProductCard from '../components/ProductCard';
import ProductForm from '../components/ProductForm';
import { productsApi, pricesApi, Product } from '../api/client';
import PriceSelectionModal from '../components/PriceSelectionModal';
import { productsApi, pricesApi, Product, PriceReviewResponse } from '../api/client';
// Type guard to check if response needs review
function isPriceReviewResponse(response: Product | PriceReviewResponse): response is PriceReviewResponse {
return 'needsReview' in response && response.needsReview === true;
}
type SortOption = 'date_added' | 'name' | 'price' | 'price_change' | 'website';
type SortOrder = 'asc' | 'desc';
@ -33,6 +39,11 @@ export default function Dashboard() {
const [isSavingBulk, setIsSavingBulk] = useState(false);
const [showBulkActions, setShowBulkActions] = useState(false);
// Price selection modal state
const [showPriceModal, setShowPriceModal] = useState(false);
const [priceReviewData, setPriceReviewData] = useState<PriceReviewResponse | null>(null);
const [pendingRefreshInterval, setPendingRefreshInterval] = useState<number>(3600);
const fetchProducts = async () => {
try {
const response = await productsApi.getAll();
@ -58,7 +69,40 @@ export default function Dashboard() {
const handleAddProduct = async (url: string, refreshInterval: number) => {
const response = await productsApi.create(url, refreshInterval);
setProducts((prev) => [response.data, ...prev]);
// Check if we need user to select a price
if (isPriceReviewResponse(response.data)) {
setPriceReviewData(response.data);
setPendingRefreshInterval(refreshInterval);
setShowPriceModal(true);
return; // Don't add product yet - wait for user selection
}
// response.data is a Product at this point
setProducts((prev) => [response.data as Product, ...prev]);
};
const handlePriceSelected = async (selectedPrice: number, selectedMethod: string) => {
if (!priceReviewData) return;
const response = await productsApi.create(
priceReviewData.url,
pendingRefreshInterval,
selectedPrice,
selectedMethod
);
// When selecting a price, the API should always return a Product
if (!isPriceReviewResponse(response.data)) {
setProducts((prev) => [response.data as Product, ...prev]);
}
setShowPriceModal(false);
setPriceReviewData(null);
};
const handlePriceModalClose = () => {
setShowPriceModal(false);
setPriceReviewData(null);
};
const handleDeleteProduct = async (id: number) => {
@ -641,6 +685,18 @@ export default function Dashboard() {
<ProductForm onSubmit={handleAddProduct} />
{/* Price Selection Modal */}
<PriceSelectionModal
isOpen={showPriceModal}
onClose={handlePriceModalClose}
onSelect={handlePriceSelected}
productName={priceReviewData?.name || null}
imageUrl={priceReviewData?.imageUrl || null}
candidates={priceReviewData?.priceCandidates || []}
suggestedPrice={priceReviewData?.suggestedPrice || null}
url={priceReviewData?.url || ''}
/>
{error && <div className="alert alert-error">{error}</div>}
{/* Dashboard Summary */}