feat: Multi-strategy price voting system with user selection

- Add multi-strategy voting: runs JSON-LD, site-specific, generic CSS, and AI extraction methods in parallel - Implement consensus voting to select the correct price when methods agree - Add AI arbitration when extraction methods disagree - Add PriceSelectionModal for users to select correct price when ambiguous - Store preferred extraction method per product for faster re-checks - Add database columns for preferred_extraction_method and needs_price_review Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-15 10:52:36 +02:00 · 2026-01-24 14:45:51 -05:00 · 2026-01-24 14:45:51 -05:00 · 4fd04cd160
commit 4fd04cd160
parent 40c45b49c8
10 changed files with 1259 additions and 12 deletions
--- a/backend/src/services/ai-extractor.ts
+++ b/backend/src/services/ai-extractor.ts
@ -4,7 +4,7 @@ import axios from 'axios';
 import { load } from 'cheerio';
 import { AISettings } from '../models';
 import { ParsedPrice } from '../utils/priceParser';
-import { StockStatus } from './scraper';
+import { StockStatus, PriceCandidate } from './scraper';

 export interface AIExtractionResult {
  name: string | null;
@ -548,3 +548,211 @@ export async function tryAIVerification(
    return null;
  }
 }
+
+// Arbitration prompt for when multiple extraction methods disagree
+const ARBITRATION_PROMPT = `You are a price arbitration assistant. Multiple price extraction methods found different prices for the same product. Help determine the correct price.
+
+Found prices:
+$CANDIDATES$
+
+Analyze the HTML content below and determine which price is the correct CURRENT selling price for the main product.
+
+Consider:
+- JSON-LD structured data is usually highly reliable (schema.org standard)
+- Site-specific extractors are well-tested for major retailers
+- Generic CSS selectors might catch wrong prices (shipping, savings, bundles, etc.)
+- Look for the price that appears in the main product display area
+- Ignore crossed-out/original prices, shipping costs, subscription prices, or bundle prices
+
+Return a JSON object with:
+- selectedIndex: the 0-based index of the correct price from the list above
+- confidence: your confidence from 0 to 1
+- reason: brief explanation of why this price is correct
+
+Only return valid JSON, no explanation text outside the JSON.
+
+HTML Content:
+`;
+
+export interface AIArbitrationResult {
+  selectedPrice: PriceCandidate | null;
+  confidence: number;
+  reason: string;
+}
+
+async function arbitrateWithAnthropic(
+  html: string,
+  candidates: PriceCandidate[],
+  apiKey: string
+): Promise<AIArbitrationResult> {
+  const anthropic = new Anthropic({ apiKey });
+
+  const candidatesList = candidates.map((c, i) =>
+    `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
+  ).join('\n');
+
+  const preparedHtml = prepareHtmlForAI(html);
+  const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
+
+  const response = await anthropic.messages.create({
+    model: 'claude-3-haiku-20240307',
+    max_tokens: 512,
+    messages: [{ role: 'user', content: prompt }],
+  });
+
+  const content = response.content[0];
+  if (content.type !== 'text') {
+    throw new Error('Unexpected response type from Anthropic');
+  }
+
+  return parseArbitrationResponse(content.text, candidates);
+}
+
+async function arbitrateWithOpenAI(
+  html: string,
+  candidates: PriceCandidate[],
+  apiKey: string
+): Promise<AIArbitrationResult> {
+  const openai = new OpenAI({ apiKey });
+
+  const candidatesList = candidates.map((c, i) =>
+    `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
+  ).join('\n');
+
+  const preparedHtml = prepareHtmlForAI(html);
+  const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
+
+  const response = await openai.chat.completions.create({
+    model: 'gpt-4o-mini',
+    max_tokens: 512,
+    messages: [{ role: 'user', content: prompt }],
+  });
+
+  const content = response.choices[0]?.message?.content;
+  if (!content) {
+    throw new Error('No response from OpenAI');
+  }
+
+  return parseArbitrationResponse(content, candidates);
+}
+
+async function arbitrateWithOllama(
+  html: string,
+  candidates: PriceCandidate[],
+  baseUrl: string,
+  model: string
+): Promise<AIArbitrationResult> {
+  const candidatesList = candidates.map((c, i) =>
+    `${i}. ${c.price} ${c.currency} (method: ${c.method}, context: ${c.context || 'none'})`
+  ).join('\n');
+
+  const preparedHtml = prepareHtmlForAI(html);
+  const prompt = ARBITRATION_PROMPT.replace('$CANDIDATES$', candidatesList) + preparedHtml;
+
+  const response = await axios.post(
+    `${baseUrl}/api/chat`,
+    {
+      model: model,
+      messages: [{ role: 'user', content: prompt }],
+      stream: false,
+    },
+    {
+      headers: { 'Content-Type': 'application/json' },
+      timeout: 120000,
+    }
+  );
+
+  const content = response.data?.message?.content;
+  if (!content) {
+    throw new Error('No response from Ollama');
+  }
+
+  return parseArbitrationResponse(content, candidates);
+}
+
+function parseArbitrationResponse(
+  responseText: string,
+  candidates: PriceCandidate[]
+): AIArbitrationResult {
+  console.log(`[AI Arbitrate] Raw response: ${responseText.substring(0, 500)}...`);
+
+  const defaultResult: AIArbitrationResult = {
+    selectedPrice: null,
+    confidence: 0,
+    reason: 'Could not parse AI response',
+  };
+
+  let jsonStr = responseText.trim();
+
+  // Handle markdown code blocks
+  const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (jsonMatch) {
+    jsonStr = jsonMatch[1].trim();
+  }
+
+  // Try to find JSON object
+  const objectMatch = jsonStr.match(/\{[\s\S]*\}/);
+  if (objectMatch) {
+    jsonStr = objectMatch[0];
+  }
+
+  try {
+    const data = JSON.parse(jsonStr);
+    console.log(`[AI Arbitrate] Parsed:`, JSON.stringify(data, null, 2));
+
+    const selectedIndex = data.selectedIndex;
+    if (typeof selectedIndex === 'number' && selectedIndex >= 0 && selectedIndex < candidates.length) {
+      return {
+        selectedPrice: candidates[selectedIndex],
+        confidence: data.confidence ?? 0.7,
+        reason: data.reason || 'AI selected this price',
+      };
+    }
+
+    return defaultResult;
+  } catch (error) {
+    console.error('[AI Arbitrate] Failed to parse response:', responseText);
+    return defaultResult;
+  }
+}
+
+// Export for use in voting scraper to arbitrate between disagreeing methods
+export async function tryAIArbitration(
+  url: string,
+  html: string,
+  candidates: PriceCandidate[],
+  userId: number
+): Promise<AIArbitrationResult | null> {
+  try {
+    const { userQueries } = await import('../models');
+    const settings = await userQueries.getAISettings(userId);
+
+    // Need AI enabled for arbitration
+    if (!settings?.ai_enabled && !settings?.ai_verification_enabled) {
+      return null;
+    }
+
+    // Need at least 2 candidates to arbitrate
+    if (candidates.length < 2) {
+      return null;
+    }
+
+    // Use the configured provider
+    if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
+      console.log(`[AI Arbitrate] Using Anthropic to arbitrate ${candidates.length} prices for ${url}`);
+      return await arbitrateWithAnthropic(html, candidates, settings.anthropic_api_key);
+    } else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
+      console.log(`[AI Arbitrate] Using OpenAI to arbitrate ${candidates.length} prices for ${url}`);
+      return await arbitrateWithOpenAI(html, candidates, settings.openai_api_key);
+    } else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) {
+      console.log(`[AI Arbitrate] Using Ollama to arbitrate ${candidates.length} prices for ${url}`);
+      return await arbitrateWithOllama(html, candidates, settings.ollama_base_url, settings.ollama_model);
+    }
+
+    console.log(`[AI Arbitrate] No provider configured`);
+    return null;
+  } catch (error) {
+    console.error(`[AI Arbitrate] Arbitration failed for ${url}:`, error);
+    return null;
+  }
+}