feat: Multi-strategy price voting system with user selection

- Add multi-strategy voting: runs JSON-LD, site-specific, generic CSS, and AI extraction methods in parallel - Implement consensus voting to select the correct price when methods agree - Add AI arbitration when extraction methods disagree - Add PriceSelectionModal for users to select correct price when ambiguous - Store preferred extraction method per product for faster re-checks - Add database columns for preferred_extraction_method and needs_price_review Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-08 15:05:16 +02:00 · 2026-01-24 14:45:51 -05:00 · 2026-01-24 14:45:51 -05:00 · 4fd04cd160
commit 4fd04cd160
parent 40c45b49c8
10 changed files with 1259 additions and 12 deletions
--- a/backend/src/index.ts
+++ b/backend/src/index.ts
@ -151,6 +151,22 @@ async function runMigrations() {
      END $$;
    `);

+    // Add multi-strategy voting columns to products table
+    await client.query(`
+      DO $$
+      BEGIN
+        IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'preferred_extraction_method') THEN
+          ALTER TABLE products ADD COLUMN preferred_extraction_method VARCHAR(20);
+        END IF;
+        IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'needs_price_review') THEN
+          ALTER TABLE products ADD COLUMN needs_price_review BOOLEAN DEFAULT false;
+        END IF;
+        IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'products' AND column_name = 'price_candidates') THEN
+          ALTER TABLE products ADD COLUMN price_candidates JSONB;
+        END IF;
+      END $$;
+    `);
+
    // Create notification_history table for tracking all triggered notifications
    await client.query(`
      CREATE TABLE IF NOT EXISTS notification_history (
--- a/backend/src/models/index.ts
+++ b/backend/src/models/index.ts
@ -551,6 +551,21 @@ export const productQueries = {
    );
    return result.rows;
  },
+
+  updateExtractionMethod: async (id: number, method: string): Promise<void> => {
+    await pool.query(
+      'UPDATE products SET preferred_extraction_method = $1, needs_price_review = false WHERE id = $2',
+      [method, id]
+    );
+  },
+
+  getPreferredExtractionMethod: async (id: number): Promise<string | null> => {
+    const result = await pool.query(
+      'SELECT preferred_extraction_method FROM products WHERE id = $1',
+      [id]
+    );
+    return result.rows[0]?.preferred_extraction_method || null;
+  },
 };

 // Price History types and queries
--- a/backend/src/routes/products.ts
+++ b/backend/src/routes/products.ts
@ -1,7 +1,7 @@
 import { Router, Response } from 'express';
 import { AuthRequest, authMiddleware } from '../middleware/auth';
 import { productQueries, priceHistoryQueries, stockStatusHistoryQueries } from '../models';
-import { scrapeProduct } from '../services/scraper';
+import { scrapeProduct, scrapeProductWithVoting, ExtractionMethod } from '../services/scraper';

 const router = Router();

@ -20,11 +20,11 @@ router.get('/', async (req: AuthRequest, res: Response) => {
  }
 });

-// Add a new product to track
+// Add a new product to track (with multi-strategy voting)
 router.post('/', async (req: AuthRequest, res: Response) => {
  try {
    const userId = req.userId!;
-    const { url, refresh_interval } = req.body;
+    const { url, refresh_interval, selectedPrice, selectedMethod } = req.body;

    if (!url) {
      res.status(400).json({ error: 'URL is required' });
@ -39,8 +39,47 @@ router.post('/', async (req: AuthRequest, res: Response) => {
      return;
    }

-    // Scrape product info (pass userId for AI fallback)
-    const scrapedData = await scrapeProduct(url, userId);
+    // If user is confirming a price selection, use the old scraper with their choice
+    if (selectedPrice !== undefined && selectedMethod) {
+      // User has selected a price from candidates - use it directly
+      const scrapedData = await scrapeProduct(url, userId);
+
+      // Create product with the user-selected price
+      const product = await productQueries.create(
+        userId,
+        url,
+        scrapedData.name,
+        scrapedData.imageUrl,
+        refresh_interval || 3600,
+        scrapedData.stockStatus
+      );
+
+      // Store the preferred extraction method and the user-selected price
+      await productQueries.updateExtractionMethod(product.id, selectedMethod);
+
+      // Record the user-selected price
+      await priceHistoryQueries.create(
+        product.id,
+        selectedPrice,
+        'USD', // TODO: Get currency from selection
+        null
+      );
+
+      // Record initial stock status
+      if (scrapedData.stockStatus !== 'unknown') {
+        await stockStatusHistoryQueries.recordChange(product.id, scrapedData.stockStatus);
+      }
+
+      // Update last_checked timestamp
+      await productQueries.updateLastChecked(product.id, product.refresh_interval);
+
+      const productWithPrice = await productQueries.findById(product.id, userId);
+      res.status(201).json(productWithPrice);
+      return;
+    }
+
+    // Use multi-strategy voting scraper
+    const scrapedData = await scrapeProductWithVoting(url, userId);

    // Allow adding out-of-stock products, but require a price for in-stock ones
    if (!scrapedData.price && scrapedData.stockStatus !== 'out_of_stock') {
@ -50,6 +89,26 @@ router.post('/', async (req: AuthRequest, res: Response) => {
      return;
    }

+    // If needsReview is true and there are multiple candidates, return them for user selection
+    if (scrapedData.needsReview && scrapedData.priceCandidates.length > 1) {
+      res.status(200).json({
+        needsReview: true,
+        name: scrapedData.name,
+        imageUrl: scrapedData.imageUrl,
+        stockStatus: scrapedData.stockStatus,
+        priceCandidates: scrapedData.priceCandidates.map(c => ({
+          price: c.price,
+          currency: c.currency,
+          method: c.method,
+          context: c.context,
+          confidence: c.confidence,
+        })),
+        suggestedPrice: scrapedData.price,
+        url,
+      });
+      return;
+    }
+
    // Create product with stock status
    const product = await productQueries.create(
      userId,
@ -60,6 +119,11 @@ router.post('/', async (req: AuthRequest, res: Response) => {
      scrapedData.stockStatus
    );

+    // Store the extraction method that worked
+    if (scrapedData.selectedMethod) {
+      await productQueries.updateExtractionMethod(product.id, scrapedData.selectedMethod);
+    }
+
    // Record initial price if available
    if (scrapedData.price) {
      await priceHistoryQueries.create(
--- a/backend/src/services/ai-extractor.ts
+++ b/backend/src/services/ai-extractor.ts
@ -4,7 +4,7 @@ import axios from 'axios';
 import { load } from 'cheerio';
 import { AISettings } from '../models';
 import { ParsedPrice } from '../utils/priceParser';
-import { StockStatus } from './scraper';
+import { StockStatus, PriceCandidate } from './scraper';

 export interface AIExtractionResult {
  name: string | null;
@ -548,3 +548,211 @@ export async function tryAIVerification(
    return null;
  }
 }
+
+// Arbitration prompt for when multiple extraction methods disagree
+const ARBITRATION_PROMPT = `You are a price arbitration assistant. Multiple price extraction methods found different prices for the same product. Help determine the correct price.
+
+Found prices:
+$CANDIDATES$
+
+Analyze the HTML content below and determine which price is the correct CURRENT selling price for the main product.
+
+Consider:
+- JSON-LD structured data is usually highly reliable (schema.org standard)
+- Site-specific extractors are well-tested for major retailers
+- Generic CSS selectors might catch wrong prices (shipping, savings, bundles, etc.)
+- Look for the price that appears in the main product display area
+- Ignore crossed-out/original prices, shipping costs, subscription prices, or bundle prices
+
+Return a JSON object with:
+- selectedIndex: the 0-based index of the correct price from the list above
+- confidence: your confidence from 0 to 1
+- reason: brief explanation of why this price is correct
+
+Only return valid JSON, no explanation text outside the JSON.
+
+HTML Content:
+`;
+
+export interface AIArbitrationResult {
+  selectedPrice: PriceCandidate | null;
+  confidence: number;
+  reason: string;
+}
+
+async function arbitrateWithAnthropic(
+  html: string,
+  candidates: PriceCandidate[],
+  apiKey: string
+): Promise<AIArbitrationResult> {
+  const anthropic = new Anthropic({ apiKey });
+
+  const candidatesList = candidates.map((c, i) =>
+    `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
+  ).join('\n');
+
+  const preparedHtml = prepareHtmlForAI(html);
+  const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
+
+  const response = await anthropic.messages.create({
+    model: 'claude-3-haiku-20240307',
+    max_tokens: 512,
+    messages: [{ role: 'user', content: prompt }],
+  });
+
+  const content = response.content[0];
+  if (content.type !== 'text') {
+    throw new Error('Unexpected response type from Anthropic');
+  }
+
+  return parseArbitrationResponse(content.text, candidates);
+}
+
+async function arbitrateWithOpenAI(
+  html: string,
+  candidates: PriceCandidate[],
+  apiKey: string
+): Promise<AIArbitrationResult> {
+  const openai = new OpenAI({ apiKey });
+
+  const candidatesList = candidates.map((c, i) =>
+    `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
+  ).join('\n');
+
+  const preparedHtml = prepareHtmlForAI(html);
+  const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
+
+  const response = await openai.chat.completions.create({
+    model: 'gpt-4o-mini',
+    max_tokens: 512,
+    messages: [{ role: 'user', content: prompt }],
+  });
+
+  const content = response.choices[0]?.message?.content;
+  if (!content) {
+    throw new Error('No response from OpenAI');
+  }
+
+  return parseArbitrationResponse(content, candidates);
+}
+
+async function arbitrateWithOllama(
+  html: string,
+  candidates: PriceCandidate[],
+  baseUrl: string,
+  model: string
+): Promise<AIArbitrationResult> {
+  const candidatesList = candidates.map((c, i) =>
+    `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
+  ).join('\n');
+
+  const preparedHtml = prepareHtmlForAI(html);
+  const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
+
+  const response = await axios.post(
+    `${baseUrl}/api/chat`,
+    {
+      model: model,
+      messages: [{ role: 'user', content: prompt }],
+      stream: false,
+    },
+    {
+      headers: { 'Content-Type': 'application/json' },
+      timeout: 120000,
+    }
+  );
+
+  const content = response.data?.message?.content;
+  if (!content) {
+    throw new Error('No response from Ollama');
+  }
+
+  return parseArbitrationResponse(content, candidates);
+}
+
+function parseArbitrationResponse(
+  responseText: string,
+  candidates: PriceCandidate[]
+): AIArbitrationResult {
+  console.log(`[AI Arbitrate] Raw response: ${responseText.substring(0, 500)}...`);
+
+  const defaultResult: AIArbitrationResult = {
+    selectedPrice: null,
+    confidence: 0,
+    reason: 'Could not parse AI response',
+  };
+
+  let jsonStr = responseText.trim();
+
+  // Handle markdown code blocks
+  const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (jsonMatch) {
+    jsonStr = jsonMatch[1].trim();
+  }
+
+  // Try to find JSON object
+  const objectMatch = jsonStr.match(/\{[\s\S]*\}/);
+  if (objectMatch) {
+    jsonStr = objectMatch[0];
+  }
+
+  try {
+    const data = JSON.parse(jsonStr);
+    console.log(`[AI Arbitrate] Parsed:`, JSON.stringify(data, null, 2));
+
+    const selectedIndex = data.selectedIndex;
+    if (typeof selectedIndex === 'number' && selectedIndex >= 0 && selectedIndex < candidates.length) {
+      return {
+        selectedPrice: candidates[selectedIndex],
+        confidence: data.confidence ?? 0.7,
+        reason: data.reason || 'AI selected this price',
+      };
+    }
+
+    return defaultResult;
+  } catch (error) {
+    console.error('[AI Arbitrate] Failed to parse response:', responseText);
+    return defaultResult;
+  }
+}
+
+// Export for use in voting scraper to arbitrate between disagreeing methods
+export async function tryAIArbitration(
+  url: string,
+  html: string,
+  candidates: PriceCandidate[],
+  userId: number
+): Promise<AIArbitrationResult | null> {
+  try {
+    const { userQueries } = await import('../models');
+    const settings = await userQueries.getAISettings(userId);
+
+    // Need AI enabled for arbitration
+    if (!settings?.ai_enabled && !settings?.ai_verification_enabled) {
+      return null;
+    }
+
+    // Need at least 2 candidates to arbitrate
+    if (candidates.length < 2) {
+      return null;
+    }
+
+    // Use the configured provider
+    if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
+      console.log(`[AI Arbitrate] Using Anthropic to arbitrate ${candidates.length} prices for ${url}`);
+      return await arbitrateWithAnthropic(html, candidates, settings.anthropic_api_key);
+    } else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
+      console.log(`[AI Arbitrate] Using OpenAI to arbitrate ${candidates.length} prices for ${url}`);
+      return await arbitrateWithOpenAI(html, candidates, settings.openai_api_key);
+    } else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) {
+      console.log(`[AI Arbitrate] Using Ollama to arbitrate ${candidates.length} prices for ${url}`);
+      return await arbitrateWithOllama(html, candidates, settings.ollama_base_url, settings.ollama_model);
+    }
+
+    console.log(`[AI Arbitrate] No provider configured`);
+    return null;
+  } catch (error) {
+    console.error(`[AI Arbitrate] Arbitration failed for ${url}:`, error);
+    return null;
+  }
+}
--- a/backend/src/services/scheduler.ts
+++ b/backend/src/services/scheduler.ts
@ -1,6 +1,6 @@
 import cron from 'node-cron';
 import { productQueries, priceHistoryQueries, userQueries, stockStatusHistoryQueries, notificationHistoryQueries, NotificationType } from '../models';
-import { scrapeProduct } from './scraper';
+import { scrapeProduct, scrapeProductWithVoting, ExtractionMethod } from './scraper';
 import { sendNotifications, NotificationPayload } from './notifications';

 let isRunning = false;
@ -23,7 +23,15 @@ async function checkPrices(): Promise<void> {
      try {
        console.log(`Checking price for product ${product.id}: ${product.url}`);

-        const scrapedData = await scrapeProduct(product.url, product.user_id);
+        // Get preferred extraction method for this product (if user previously selected one)
+        const preferredMethod = await productQueries.getPreferredExtractionMethod(product.id);
+
+        // Use voting scraper with preferred method if available
+        const scrapedData = await scrapeProductWithVoting(
+          product.url,
+          product.user_id,
+          preferredMethod as ExtractionMethod | undefined
+        );

        // Check for back-in-stock notification
        const wasOutOfStock = product.stock_status === 'out_of_stock';
--- a/backend/src/services/scraper.ts
+++ b/backend/src/services/scraper.ts
@ -13,6 +13,220 @@ puppeteer.use(StealthPlugin());

 export type StockStatus = 'in_stock' | 'out_of_stock' | 'unknown';

+// Extraction method types for multi-strategy voting
+export type ExtractionMethod = 'json-ld' | 'site-specific' | 'generic-css' | 'ai';
+
+// Price candidate from a single extraction method
+export interface PriceCandidate {
+  price: number;
+  currency: string;
+  method: ExtractionMethod;
+  context?: string; // Text around the price for user context
+  confidence: number; // 0-1 confidence score
+}
+
+// Extended scrape result with candidates for voting
+export interface ScrapedProductWithCandidates {
+  name: string | null;
+  price: ParsedPrice | null;
+  imageUrl: string | null;
+  url: string;
+  stockStatus: StockStatus;
+  aiStatus: 'verified' | 'corrected' | null;
+  priceCandidates: PriceCandidate[];
+  needsReview: boolean;
+  selectedMethod?: ExtractionMethod; // Which method was used for final price
+}
+
+// Check if two prices are "close enough" to be considered the same (within 5%)
+function pricesMatch(price1: number, price2: number): boolean {
+  if (price1 === price2) return true;
+  const diff = Math.abs(price1 - price2);
+  const avg = (price1 + price2) / 2;
+  return (diff / avg) < 0.05; // Within 5%
+}
+
+// Find consensus among price candidates
+function findPriceConsensus(candidates: PriceCandidate[]): { price: PriceCandidate | null; hasConsensus: boolean; groups: PriceCandidate[][] } {
+  if (candidates.length === 0) return { price: null, hasConsensus: false, groups: [] };
+  if (candidates.length === 1) return { price: candidates[0], hasConsensus: true, groups: [[candidates[0]]] };
+
+  // Group prices that match
+  const groups: PriceCandidate[][] = [];
+  for (const candidate of candidates) {
+    let foundGroup = false;
+    for (const group of groups) {
+      if (pricesMatch(candidate.price, group[0].price)) {
+        group.push(candidate);
+        foundGroup = true;
+        break;
+      }
+    }
+    if (!foundGroup) {
+      groups.push([candidate]);
+    }
+  }
+
+  // Sort groups by size (most votes first), then by confidence
+  groups.sort((a, b) => {
+    if (b.length !== a.length) return b.length - a.length;
+    const avgConfA = a.reduce((sum, c) => sum + c.confidence, 0) / a.length;
+    const avgConfB = b.reduce((sum, c) => sum + c.confidence, 0) / b.length;
+    return avgConfB - avgConfA;
+  });
+
+  const largestGroup = groups[0];
+  // Consensus if majority agrees (>= 50% of methods) OR if top group has significantly more votes
+  const hasConsensus = largestGroup.length >= Math.ceil(candidates.length / 2) ||
+                       (groups.length > 1 && largestGroup.length > groups[1].length);
+
+  // Pick the highest confidence candidate from the winning group
+  const winner = largestGroup.sort((a, b) => b.confidence - a.confidence)[0];
+
+  return { price: winner, hasConsensus, groups };
+}
+
+// Extract price candidates from JSON-LD structured data
+function extractJsonLdCandidates($: CheerioAPI): PriceCandidate[] {
+  const candidates: PriceCandidate[] = [];
+  try {
+    const scripts = $('script[type="application/ld+json"]');
+    for (let i = 0; i < scripts.length; i++) {
+      const content = $(scripts[i]).html();
+      if (!content) continue;
+
+      const data = JSON.parse(content) as JsonLdProduct | JsonLdProduct[];
+      const product = findProduct(data);
+
+      if (product?.offers) {
+        const offer = Array.isArray(product.offers) ? product.offers[0] : product.offers;
+        const priceValue = offer.lowPrice || offer.price || offer.priceSpecification?.price;
+        const currency = offer.priceCurrency || offer.priceSpecification?.priceCurrency || 'USD';
+
+        if (priceValue) {
+          const price = parseFloat(String(priceValue));
+          if (!isNaN(price) && price > 0) {
+            candidates.push({
+              price,
+              currency,
+              method: 'json-ld',
+              context: `Structured data: ${product.name || 'Product'}`,
+              confidence: 0.9, // JSON-LD is highly reliable
+            });
+          }
+        }
+      }
+    }
+  } catch (_e) {
+    // JSON parse error
+  }
+  return candidates;
+}
+
+// Extract price candidates from site-specific scraper
+function extractSiteSpecificCandidates($: CheerioAPI, url: string): { candidates: PriceCandidate[]; name: string | null; imageUrl: string | null; stockStatus: StockStatus } {
+  const candidates: PriceCandidate[] = [];
+  let name: string | null = null;
+  let imageUrl: string | null = null;
+  let stockStatus: StockStatus = 'unknown';
+
+  const siteScraper = siteScrapers.find((s) => s.match(url));
+  if (siteScraper) {
+    const siteResult = siteScraper.scrape($, url);
+    if (siteResult.price) {
+      candidates.push({
+        price: siteResult.price.price,
+        currency: siteResult.price.currency,
+        method: 'site-specific',
+        context: `Site-specific extractor for ${new URL(url).hostname}`,
+        confidence: 0.85, // Site-specific scrapers are well-tested
+      });
+    }
+    name = siteResult.name || null;
+    imageUrl = siteResult.imageUrl || null;
+    stockStatus = siteResult.stockStatus || 'unknown';
+  }
+
+  return { candidates, name, imageUrl, stockStatus };
+}
+
+// Extract price candidates from generic CSS selectors
+function extractGenericCssCandidates($: CheerioAPI): PriceCandidate[] {
+  const candidates: PriceCandidate[] = [];
+  const seen = new Set<number>();
+
+  for (const selector of genericPriceSelectors) {
+    const elements = $(selector);
+    elements.each((_, el) => {
+      const $el = $(el);
+      // Skip if this looks like an "original" or "was" price
+      const classAttr = $el.attr('class') || '';
+      const parentClass = $el.parent().attr('class') || '';
+      if (/original|was|old|regular|compare|strikethrough|line-through/i.test(classAttr + parentClass)) {
+        return;
+      }
+
+      // Check various attributes where price might be stored
+      const priceAmount = $el.attr('data-price-amount');
+      const dataPrice = $el.attr('data-price');
+      const content = $el.attr('content');
+      const text = $el.text();
+
+      let parsed: ParsedPrice | null = null;
+      let context = selector;
+
+      // Try data-price-amount first (Magento stores numeric value here)
+      if (priceAmount) {
+        const price = parseFloat(priceAmount);
+        if (!isNaN(price) && price > 0) {
+          let currency = 'USD';
+          const textSources = [text, $el.parent().text(), $el.closest('.price-box').text()];
+          for (const source of textSources) {
+            if (!source) continue;
+            const currencyCodeMatch = source.match(/\b(CHF|EUR|GBP|USD|CAD|AUD|JPY|INR)\b/i);
+            if (currencyCodeMatch) {
+              currency = currencyCodeMatch[1].toUpperCase();
+              break;
+            }
+            const symbolMatch = source.match(/([$€£¥₹])/);
+            if (symbolMatch) {
+              const symbolMap: Record<string, string> = { '$': 'USD', '€': 'EUR', '£': 'GBP', '¥': 'JPY', '₹': 'INR' };
+              currency = symbolMap[symbolMatch[1]] || 'USD';
+              break;
+            }
+          }
+          parsed = { price, currency };
+          context = `data-price-amount attribute`;
+        }
+      }
+
+      if (!parsed) {
+        const priceStr = content || dataPrice || text;
+        parsed = parsePrice(priceStr);
+        if (parsed) {
+          context = text.trim().slice(0, 50);
+        }
+      }
+
+      if (parsed && parsed.price > 0 && !seen.has(parsed.price)) {
+        seen.add(parsed.price);
+        candidates.push({
+          price: parsed.price,
+          currency: parsed.currency,
+          method: 'generic-css',
+          context,
+          confidence: 0.6, // Generic CSS is less reliable
+        });
+      }
+    });
+
+    // Only take first few generic candidates to avoid noise
+    if (candidates.length >= 3) break;
+  }
+
+  return candidates;
+}
+
 // Browser-based scraping for sites that block HTTP requests (e.g., Cloudflare)
 async function scrapeWithBrowser(url: string): Promise<string> {
  const browser = await puppeteer.launch({
@ -1059,6 +1273,287 @@ export async function scrapeProduct(url: string, userId?: number): Promise<Scrap
  return result;
 }

+/**
+ * Multi-strategy voting scraper with user review support.
+ * Runs all extraction methods, finds consensus, and flags ambiguous cases for user review.
+ */
+export async function scrapeProductWithVoting(
+  url: string,
+  userId?: number,
+  preferredMethod?: ExtractionMethod
+): Promise<ScrapedProductWithCandidates> {
+  const result: ScrapedProductWithCandidates = {
+    name: null,
+    price: null,
+    imageUrl: null,
+    url,
+    stockStatus: 'unknown',
+    aiStatus: null,
+    priceCandidates: [],
+    needsReview: false,
+  };
+
+  let html: string = '';
+
+  try {
+    let usedBrowser = false;
+
+    // Fetch HTML
+    try {
+      const response = await axios.get<string>(url, {
+        headers: {
+          'User-Agent':
+            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
+          Accept:
+            'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
+          'Accept-Language': 'en-US,en;q=0.9',
+          'Accept-Encoding': 'gzip, deflate, br',
+          'Cache-Control': 'no-cache',
+          Pragma: 'no-cache',
+          'Sec-Ch-Ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
+          'Sec-Ch-Ua-Mobile': '?0',
+          'Sec-Ch-Ua-Platform': '"Windows"',
+          'Sec-Fetch-Dest': 'document',
+          'Sec-Fetch-Mode': 'navigate',
+          'Sec-Fetch-Site': 'none',
+          'Sec-Fetch-User': '?1',
+          'Upgrade-Insecure-Requests': '1',
+        },
+        timeout: 20000,
+        maxRedirects: 5,
+      });
+      html = response.data;
+    } catch (axiosError) {
+      if (axiosError instanceof AxiosError && axiosError.response?.status === 403) {
+        console.log(`[Voting] HTTP blocked (403) for ${url}, using browser...`);
+        html = await scrapeWithBrowser(url);
+        usedBrowser = true;
+      } else {
+        throw axiosError;
+      }
+    }
+
+    let $ = load(html);
+
+    // Collect candidates from all methods
+    const allCandidates: PriceCandidate[] = [];
+
+    // 1. JSON-LD extraction (highest reliability)
+    const jsonLdCandidates = extractJsonLdCandidates($);
+    allCandidates.push(...jsonLdCandidates);
+    console.log(`[Voting] JSON-LD found ${jsonLdCandidates.length} candidates`);
+
+    // 2. Site-specific extraction
+    const siteResult = extractSiteSpecificCandidates($, url);
+    allCandidates.push(...siteResult.candidates);
+    if (siteResult.name) result.name = siteResult.name;
+    if (siteResult.imageUrl) result.imageUrl = siteResult.imageUrl;
+    if (siteResult.stockStatus !== 'unknown') result.stockStatus = siteResult.stockStatus;
+    console.log(`[Voting] Site-specific found ${siteResult.candidates.length} candidates`);
+
+    // 3. Generic CSS extraction
+    const genericCandidates = extractGenericCssCandidates($);
+    allCandidates.push(...genericCandidates);
+    console.log(`[Voting] Generic CSS found ${genericCandidates.length} candidates`);
+
+    // If no candidates found in static HTML, try browser rendering
+    if (allCandidates.length === 0 && !usedBrowser) {
+      console.log(`[Voting] No candidates in static HTML, trying browser...`);
+      try {
+        html = await scrapeWithBrowser(url);
+        usedBrowser = true;
+        $ = load(html);
+
+        // Re-run all extraction methods
+        allCandidates.push(...extractJsonLdCandidates($));
+        const browserSiteResult = extractSiteSpecificCandidates($, url);
+        allCandidates.push(...browserSiteResult.candidates);
+        if (!result.name && browserSiteResult.name) result.name = browserSiteResult.name;
+        if (!result.imageUrl && browserSiteResult.imageUrl) result.imageUrl = browserSiteResult.imageUrl;
+        if (result.stockStatus === 'unknown' && browserSiteResult.stockStatus !== 'unknown') {
+          result.stockStatus = browserSiteResult.stockStatus;
+        }
+        allCandidates.push(...extractGenericCssCandidates($));
+        console.log(`[Voting] Browser found ${allCandidates.length} total candidates`);
+      } catch (browserError) {
+        console.error(`[Voting] Browser fallback failed:`, browserError);
+      }
+    }
+
+    // Fill in missing metadata
+    if (!result.name) {
+      result.name = extractGenericName($) || $('meta[property="og:title"]').attr('content') || null;
+    }
+    if (!result.imageUrl) {
+      result.imageUrl = extractGenericImage($, url) || $('meta[property="og:image"]').attr('content') || null;
+    }
+    if (result.stockStatus === 'unknown') {
+      result.stockStatus = extractGenericStockStatus($);
+    }
+
+    // Store all candidates
+    result.priceCandidates = allCandidates;
+
+    // If user has a preferred method, try to use it
+    if (preferredMethod && allCandidates.length > 0) {
+      const preferredCandidate = allCandidates.find(c => c.method === preferredMethod);
+      if (preferredCandidate) {
+        console.log(`[Voting] Using preferred method ${preferredMethod}: ${preferredCandidate.price}`);
+        result.price = { price: preferredCandidate.price, currency: preferredCandidate.currency };
+        result.selectedMethod = preferredMethod;
+        return result;
+      }
+    }
+
+    // Find consensus
+    const { price: consensusPrice, hasConsensus, groups } = findPriceConsensus(allCandidates);
+    console.log(`[Voting] Consensus: ${hasConsensus}, Groups: ${groups.length}, Winner: ${consensusPrice?.price}`);
+
+    if (hasConsensus && consensusPrice) {
+      // Clear consensus - use the winning price
+      result.price = { price: consensusPrice.price, currency: consensusPrice.currency };
+      result.selectedMethod = consensusPrice.method;
+      console.log(`[Voting] Consensus price: ${consensusPrice.price} via ${consensusPrice.method}`);
+    } else if (allCandidates.length > 0) {
+      // No consensus - try AI arbitration if available
+      if (userId && html) {
+        console.log(`[Voting] No consensus, trying AI arbitration...`);
+        try {
+          const { tryAIArbitration } = await import('./ai-extractor');
+          const aiResult = await tryAIArbitration(url, html, allCandidates, userId);
+
+          if (aiResult && aiResult.selectedPrice) {
+            console.log(`[Voting] AI selected price: ${aiResult.selectedPrice.price} (reason: ${aiResult.reason})`);
+            result.price = { price: aiResult.selectedPrice.price, currency: aiResult.selectedPrice.currency };
+            result.selectedMethod = aiResult.selectedPrice.method;
+            result.aiStatus = 'verified';
+
+            // Add AI as a candidate for transparency
+            if (!allCandidates.find(c => c.method === 'ai')) {
+              result.priceCandidates.push({
+                price: aiResult.selectedPrice.price,
+                currency: aiResult.selectedPrice.currency,
+                method: 'ai',
+                context: `AI arbitration: ${aiResult.reason}`,
+                confidence: aiResult.confidence || 0.8,
+              });
+            }
+          } else {
+            // AI couldn't decide either - flag for user review
+            console.log(`[Voting] AI couldn't decide, flagging for user review`);
+            result.needsReview = true;
+            // Use the most confident candidate as default
+            const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0];
+            result.price = { price: bestCandidate.price, currency: bestCandidate.currency };
+            result.selectedMethod = bestCandidate.method;
+          }
+        } catch (aiError) {
+          console.error(`[Voting] AI arbitration failed:`, aiError);
+          // Fall back to flagging for user review
+          result.needsReview = true;
+          const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0];
+          result.price = { price: bestCandidate.price, currency: bestCandidate.currency };
+          result.selectedMethod = bestCandidate.method;
+        }
+      } else {
+        // No AI available - flag for user review if multiple prices differ significantly
+        if (groups.length > 1) {
+          result.needsReview = true;
+          console.log(`[Voting] Multiple price groups found, flagging for user review`);
+        }
+        // Use the most confident candidate as default
+        const bestCandidate = allCandidates.sort((a, b) => b.confidence - a.confidence)[0];
+        result.price = { price: bestCandidate.price, currency: bestCandidate.currency };
+        result.selectedMethod = bestCandidate.method;
+      }
+    } else {
+      // No candidates at all - try pure AI extraction
+      if (userId && html) {
+        console.log(`[Voting] No candidates found, trying AI extraction...`);
+        try {
+          const { tryAIExtraction } = await import('./ai-extractor');
+          const aiResult = await tryAIExtraction(url, html, userId);
+
+          if (aiResult && aiResult.price && aiResult.confidence > 0.5) {
+            console.log(`[Voting] AI extracted price: ${aiResult.price.price}`);
+            result.price = aiResult.price;
+            result.selectedMethod = 'ai';
+            result.priceCandidates.push({
+              price: aiResult.price.price,
+              currency: aiResult.price.currency,
+              method: 'ai',
+              context: 'AI extraction (no other methods found price)',
+              confidence: aiResult.confidence,
+            });
+            if (!result.name && aiResult.name) result.name = aiResult.name;
+            if (!result.imageUrl && aiResult.imageUrl) result.imageUrl = aiResult.imageUrl;
+            if (result.stockStatus === 'unknown' && aiResult.stockStatus !== 'unknown') {
+              result.stockStatus = aiResult.stockStatus;
+            }
+          }
+        } catch (aiError) {
+          console.error(`[Voting] AI extraction failed:`, aiError);
+        }
+      }
+    }
+
+    // If we have a price but AI is available, verify it
+    if (result.price && userId && html && !result.aiStatus) {
+      try {
+        const { tryAIVerification } = await import('./ai-extractor');
+        const verifyResult = await tryAIVerification(
+          url,
+          html,
+          result.price.price,
+          result.price.currency,
+          userId
+        );
+
+        if (verifyResult) {
+          if (verifyResult.isCorrect) {
+            result.aiStatus = 'verified';
+          } else if (verifyResult.suggestedPrice && verifyResult.confidence > 0.7) {
+            // AI suggests a different price - this might indicate we need review
+            const existingCandidate = allCandidates.find(c =>
+              pricesMatch(c.price, verifyResult.suggestedPrice!.price)
+            );
+            if (existingCandidate) {
+              // AI agrees with one of our candidates - use that
+              result.price = verifyResult.suggestedPrice;
+              result.selectedMethod = existingCandidate.method;
+              result.aiStatus = 'corrected';
+            } else if (!result.needsReview) {
+              // AI suggests a price we didn't find - flag for review
+              result.needsReview = true;
+              result.priceCandidates.push({
+                price: verifyResult.suggestedPrice.price,
+                currency: verifyResult.suggestedPrice.currency,
+                method: 'ai',
+                context: `AI suggestion: ${verifyResult.reason}`,
+                confidence: verifyResult.confidence,
+              });
+            }
+          }
+
+          // Update stock status from AI
+          if (verifyResult.stockStatus && verifyResult.stockStatus !== 'unknown') {
+            if (result.stockStatus === 'unknown' || verifyResult.stockStatus === 'out_of_stock') {
+              result.stockStatus = verifyResult.stockStatus;
+            }
+          }
+        }
+      } catch (verifyError) {
+        console.error(`[Voting] AI verification failed:`, verifyError);
+      }
+    }
+
+  } catch (error) {
+    console.error(`[Voting] Error scraping ${url}:`, error);
+  }
+
+  return result;
+}
+
 interface JsonLdProduct {
  '@type'?: string;
  '@graph'?: JsonLdProduct[];