diff --git a/backend/src/index.ts b/backend/src/index.ts index bed4088..03e2521 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -58,6 +58,9 @@ async function runMigrations() { IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ntfy_enabled') THEN ALTER TABLE users ADD COLUMN ntfy_enabled BOOLEAN DEFAULT true; END IF; + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_verification_enabled') THEN + ALTER TABLE users ADD COLUMN ai_verification_enabled BOOLEAN DEFAULT false; + END IF; END $$; `); diff --git a/backend/src/models/index.ts b/backend/src/models/index.ts index 211b799..3e8616c 100644 --- a/backend/src/models/index.ts +++ b/backend/src/models/index.ts @@ -36,6 +36,7 @@ export interface NotificationSettings { export interface AISettings { ai_enabled: boolean; + ai_verification_enabled: boolean; ai_provider: 'anthropic' | 'openai' | 'ollama' | null; anthropic_api_key: string | null; openai_api_key: string | null; @@ -209,7 +210,9 @@ export const userQueries = { getAISettings: async (id: number): Promise => { const result = await pool.query( - 'SELECT ai_enabled, ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model FROM users WHERE id = $1', + `SELECT ai_enabled, COALESCE(ai_verification_enabled, false) as ai_verification_enabled, + ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model + FROM users WHERE id = $1`, [id] ); return result.rows[0] || null; @@ -227,6 +230,10 @@ export const userQueries = { fields.push(`ai_enabled = $${paramIndex++}`); values.push(settings.ai_enabled); } + if (settings.ai_verification_enabled !== undefined) { + fields.push(`ai_verification_enabled = $${paramIndex++}`); + values.push(settings.ai_verification_enabled); + } if (settings.ai_provider !== undefined) { fields.push(`ai_provider = $${paramIndex++}`); values.push(settings.ai_provider); @@ -253,7 +260,8 @@ export const userQueries = { values.push(id.toString()); const result = await pool.query( `UPDATE users SET ${fields.join(', ')} WHERE id = $${paramIndex} - RETURNING ai_enabled, ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model`, + RETURNING ai_enabled, COALESCE(ai_verification_enabled, false) as ai_verification_enabled, + ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model`, values ); return result.rows[0] || null; diff --git a/backend/src/services/ai-extractor.ts b/backend/src/services/ai-extractor.ts index 6e2cea2..1b383b9 100644 --- a/backend/src/services/ai-extractor.ts +++ b/backend/src/services/ai-extractor.ts @@ -14,6 +14,39 @@ export interface AIExtractionResult { confidence: number; } +export interface AIVerificationResult { + isCorrect: boolean; + confidence: number; + suggestedPrice: ParsedPrice | null; + reason: string; +} + +const VERIFICATION_PROMPT = `You are a price verification assistant. I scraped a product page and found a price. Please verify if this price is correct. + +Scraped Price: $SCRAPED_PRICE$ $CURRENCY$ + +Analyze the HTML content below and determine: +1. Is the scraped price the correct CURRENT/SALE price for the main product? +2. If not, what is the correct price? + +Common issues to watch for: +- Scraped price might be a "savings" amount (e.g., "Save $189.99") +- Scraped price might be from a bundle/combo deal section +- Scraped price might be shipping cost or add-on price +- Scraped price might be the original/crossed-out price instead of the sale price + +Return a JSON object with: +- isCorrect: boolean - true if the scraped price is correct +- confidence: number from 0 to 1 +- suggestedPrice: the correct price as a number (or null if scraped price is correct) +- suggestedCurrency: currency code if suggesting a different price +- reason: brief explanation of your decision + +Only return valid JSON, no explanation text outside the JSON. + +HTML Content: +`; + const EXTRACTION_PROMPT = `You are a price extraction assistant. Analyze the following HTML content from a product page and extract the product information. Return a JSON object with these fields: @@ -180,6 +213,153 @@ async function extractWithOllama( return parseAIResponse(content); } +// Verification functions for each provider +async function verifyWithAnthropic( + html: string, + scrapedPrice: number, + currency: string, + apiKey: string +): Promise { + const anthropic = new Anthropic({ apiKey }); + + const preparedHtml = prepareHtmlForAI(html); + const prompt = VERIFICATION_PROMPT + .replace('$SCRAPED_PRICE$', scrapedPrice.toString()) + .replace('$CURRENCY$', currency) + preparedHtml; + + const response = await anthropic.messages.create({ + model: 'claude-3-haiku-20240307', + max_tokens: 512, + messages: [{ role: 'user', content: prompt }], + }); + + const content = response.content[0]; + if (content.type !== 'text') { + throw new Error('Unexpected response type from Anthropic'); + } + + return parseVerificationResponse(content.text, scrapedPrice, currency); +} + +async function verifyWithOpenAI( + html: string, + scrapedPrice: number, + currency: string, + apiKey: string +): Promise { + const openai = new OpenAI({ apiKey }); + + const preparedHtml = prepareHtmlForAI(html); + const prompt = VERIFICATION_PROMPT + .replace('$SCRAPED_PRICE$', scrapedPrice.toString()) + .replace('$CURRENCY$', currency) + preparedHtml; + + const response = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + max_tokens: 512, + messages: [{ role: 'user', content: prompt }], + }); + + const content = response.choices[0]?.message?.content; + if (!content) { + throw new Error('No response from OpenAI'); + } + + return parseVerificationResponse(content, scrapedPrice, currency); +} + +async function verifyWithOllama( + html: string, + scrapedPrice: number, + currency: string, + baseUrl: string, + model: string +): Promise { + const preparedHtml = prepareHtmlForAI(html); + const prompt = VERIFICATION_PROMPT + .replace('$SCRAPED_PRICE$', scrapedPrice.toString()) + .replace('$CURRENCY$', currency) + preparedHtml; + + const response = await axios.post( + `${baseUrl}/api/chat`, + { + model: model, + messages: [{ role: 'user', content: prompt }], + stream: false, + }, + { + headers: { 'Content-Type': 'application/json' }, + timeout: 120000, + } + ); + + const content = response.data?.message?.content; + if (!content) { + throw new Error('No response from Ollama'); + } + + return parseVerificationResponse(content, scrapedPrice, currency); +} + +function parseVerificationResponse( + responseText: string, + originalPrice: number, + originalCurrency: string +): AIVerificationResult { + console.log(`[AI Verify] Raw response: ${responseText.substring(0, 500)}...`); + + // Default result if parsing fails + const defaultResult: AIVerificationResult = { + isCorrect: true, // Assume correct if we can't parse + confidence: 0.5, + suggestedPrice: null, + reason: 'Could not parse AI response', + }; + + let jsonStr = responseText.trim(); + + // Handle markdown code blocks + const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/); + if (jsonMatch) { + jsonStr = jsonMatch[1].trim(); + } + + // Try to find JSON object + const objectMatch = jsonStr.match(/\{[\s\S]*\}/); + if (objectMatch) { + jsonStr = objectMatch[0]; + } + + try { + const data = JSON.parse(jsonStr); + console.log(`[AI Verify] Parsed:`, JSON.stringify(data, null, 2)); + + let suggestedPrice: ParsedPrice | null = null; + if (!data.isCorrect && data.suggestedPrice !== null && data.suggestedPrice !== undefined) { + const priceNum = typeof data.suggestedPrice === 'string' + ? parseFloat(data.suggestedPrice.replace(/[^0-9.]/g, '')) + : data.suggestedPrice; + + if (!isNaN(priceNum) && priceNum > 0) { + suggestedPrice = { + price: priceNum, + currency: data.suggestedCurrency || originalCurrency, + }; + } + } + + return { + isCorrect: data.isCorrect ?? true, + confidence: data.confidence ?? 0.5, + suggestedPrice, + reason: data.reason || 'No reason provided', + }; + } catch (error) { + console.error('[AI Verify] Failed to parse response:', responseText); + return defaultResult; + } +} + function parseAIResponse(responseText: string): AIExtractionResult { console.log(`[AI] Raw response: ${responseText.substring(0, 500)}...`); @@ -307,3 +487,40 @@ export async function tryAIExtraction( return null; } } + +// Export for use in scraper to verify scraped prices +export async function tryAIVerification( + url: string, + html: string, + scrapedPrice: number, + currency: string, + userId: number +): Promise { + try { + const { userQueries } = await import('../models'); + const settings = await userQueries.getAISettings(userId); + + // Check if AI verification is enabled (separate from AI extraction fallback) + if (!settings?.ai_verification_enabled) { + return null; + } + + // Need a configured provider + if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) { + console.log(`[AI Verify] Using Anthropic to verify $${scrapedPrice} for ${url}`); + return await verifyWithAnthropic(html, scrapedPrice, currency, settings.anthropic_api_key); + } else if (settings.ai_provider === 'openai' && settings.openai_api_key) { + console.log(`[AI Verify] Using OpenAI to verify $${scrapedPrice} for ${url}`); + return await verifyWithOpenAI(html, scrapedPrice, currency, settings.openai_api_key); + } else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) { + console.log(`[AI Verify] Using Ollama to verify $${scrapedPrice} for ${url}`); + return await verifyWithOllama(html, scrapedPrice, currency, settings.ollama_base_url, settings.ollama_model); + } + + console.log(`[AI Verify] Verification enabled but no provider configured`); + return null; + } catch (error) { + console.error(`[AI Verify] Verification failed for ${url}:`, error); + return null; + } +} diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index 2c6b097..6727114 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -818,7 +818,34 @@ export async function scrapeProduct(url: string, userId?: number): Promise 0.6) { + console.log(`[AI Verify] Price correction: $${result.price.price} -> $${verifyResult.suggestedPrice.price} (${verifyResult.reason})`); + result.price = verifyResult.suggestedPrice; + } else { + console.log(`[AI Verify] Price might be incorrect but no confident suggestion: ${verifyResult.reason}`); + } + } + } catch (verifyError) { + console.error(`[AI Verify] Verification failed for ${url}:`, verifyError); + } + } + + // If we still don't have a price and userId is provided, try AI extraction as fallback if (!result.price && userId && html) { try { const { tryAIExtraction } = await import('./ai-extractor'); diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index f098c00..6b9be32 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -196,6 +196,7 @@ export const settingsApi = { updateAI: (data: { ai_enabled?: boolean; + ai_verification_enabled?: boolean; ai_provider?: 'anthropic' | 'openai' | 'ollama' | null; anthropic_api_key?: string | null; openai_api_key?: string | null; @@ -213,6 +214,7 @@ export const settingsApi = { // AI Settings types export interface AISettings { ai_enabled: boolean; + ai_verification_enabled: boolean; ai_provider: 'anthropic' | 'openai' | 'ollama' | null; anthropic_api_key: string | null; openai_api_key: string | null; diff --git a/frontend/src/pages/Settings.tsx b/frontend/src/pages/Settings.tsx index 7230841..a2e07de 100644 --- a/frontend/src/pages/Settings.tsx +++ b/frontend/src/pages/Settings.tsx @@ -47,6 +47,7 @@ export default function Settings() { // AI state const [aiSettings, setAISettings] = useState(null); const [aiEnabled, setAIEnabled] = useState(false); + const [aiVerificationEnabled, setAIVerificationEnabled] = useState(false); const [aiProvider, setAIProvider] = useState<'anthropic' | 'openai' | 'ollama'>('anthropic'); const [anthropicApiKey, setAnthropicApiKey] = useState(''); const [openaiApiKey, setOpenaiApiKey] = useState(''); @@ -97,6 +98,7 @@ export default function Settings() { // Populate AI fields with actual values setAISettings(aiRes.data); setAIEnabled(aiRes.data.ai_enabled); + setAIVerificationEnabled(aiRes.data.ai_verification_enabled ?? false); if (aiRes.data.ai_provider) { setAIProvider(aiRes.data.ai_provider); } @@ -353,6 +355,7 @@ export default function Settings() { try { const response = await settingsApi.updateAI({ ai_enabled: aiEnabled, + ai_verification_enabled: aiVerificationEnabled, ai_provider: aiProvider, anthropic_api_key: anthropicApiKey || undefined, openai_api_key: openaiApiKey || undefined, @@ -360,6 +363,7 @@ export default function Settings() { ollama_model: aiProvider === 'ollama' ? ollamaModel || null : undefined, }); setAISettings(response.data); + setAIVerificationEnabled(response.data.ai_verification_enabled ?? false); setAnthropicApiKey(''); setOpenaiApiKey(''); setSuccess('AI settings saved successfully'); @@ -1287,7 +1291,20 @@ export default function Settings() { /> - {aiEnabled && ( +
+
+ Enable AI Verification + + Verify all scraped prices with AI to ensure accuracy + +
+
+ + {(aiEnabled || aiVerificationEnabled) && ( <>