Add AI price verification feature

When enabled, AI verifies every scraped price to ensure accuracy.
This catches issues like scraped "savings" amounts instead of actual prices.

- Add ai_verification_enabled column to users table
- Create verification prompt and functions for Anthropic, OpenAI, Ollama
- Integrate verification step into scraper after traditional scraping
- Add verification toggle to Settings page (separate from AI extraction)
- AI verification is independent of AI extraction fallback

Flow: Traditional scraping -> AI verification (if enabled) -> AI extraction fallback (if no price found)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-23 09:24:17 -05:00
parent 7afa3ccec3
commit dc4e7b9665
6 changed files with 278 additions and 4 deletions

View file

@ -58,6 +58,9 @@ async function runMigrations() {
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ntfy_enabled') THEN
ALTER TABLE users ADD COLUMN ntfy_enabled BOOLEAN DEFAULT true;
END IF;
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_verification_enabled') THEN
ALTER TABLE users ADD COLUMN ai_verification_enabled BOOLEAN DEFAULT false;
END IF;
END $$;
`);

View file

@ -36,6 +36,7 @@ export interface NotificationSettings {
export interface AISettings {
ai_enabled: boolean;
ai_verification_enabled: boolean;
ai_provider: 'anthropic' | 'openai' | 'ollama' | null;
anthropic_api_key: string | null;
openai_api_key: string | null;
@ -209,7 +210,9 @@ export const userQueries = {
getAISettings: async (id: number): Promise<AISettings | null> => {
const result = await pool.query(
'SELECT ai_enabled, ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model FROM users WHERE id = $1',
`SELECT ai_enabled, COALESCE(ai_verification_enabled, false) as ai_verification_enabled,
ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model
FROM users WHERE id = $1`,
[id]
);
return result.rows[0] || null;
@ -227,6 +230,10 @@ export const userQueries = {
fields.push(`ai_enabled = $${paramIndex++}`);
values.push(settings.ai_enabled);
}
if (settings.ai_verification_enabled !== undefined) {
fields.push(`ai_verification_enabled = $${paramIndex++}`);
values.push(settings.ai_verification_enabled);
}
if (settings.ai_provider !== undefined) {
fields.push(`ai_provider = $${paramIndex++}`);
values.push(settings.ai_provider);
@ -253,7 +260,8 @@ export const userQueries = {
values.push(id.toString());
const result = await pool.query(
`UPDATE users SET ${fields.join(', ')} WHERE id = $${paramIndex}
RETURNING ai_enabled, ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model`,
RETURNING ai_enabled, COALESCE(ai_verification_enabled, false) as ai_verification_enabled,
ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model`,
values
);
return result.rows[0] || null;

View file

@ -14,6 +14,39 @@ export interface AIExtractionResult {
confidence: number;
}
export interface AIVerificationResult {
isCorrect: boolean;
confidence: number;
suggestedPrice: ParsedPrice | null;
reason: string;
}
const VERIFICATION_PROMPT = `You are a price verification assistant. I scraped a product page and found a price. Please verify if this price is correct.
Scraped Price: $SCRAPED_PRICE$ $CURRENCY$
Analyze the HTML content below and determine:
1. Is the scraped price the correct CURRENT/SALE price for the main product?
2. If not, what is the correct price?
Common issues to watch for:
- Scraped price might be a "savings" amount (e.g., "Save $189.99")
- Scraped price might be from a bundle/combo deal section
- Scraped price might be shipping cost or add-on price
- Scraped price might be the original/crossed-out price instead of the sale price
Return a JSON object with:
- isCorrect: boolean - true if the scraped price is correct
- confidence: number from 0 to 1
- suggestedPrice: the correct price as a number (or null if scraped price is correct)
- suggestedCurrency: currency code if suggesting a different price
- reason: brief explanation of your decision
Only return valid JSON, no explanation text outside the JSON.
HTML Content:
`;
const EXTRACTION_PROMPT = `You are a price extraction assistant. Analyze the following HTML content from a product page and extract the product information.
Return a JSON object with these fields:
@ -180,6 +213,153 @@ async function extractWithOllama(
return parseAIResponse(content);
}
// Verification functions for each provider
async function verifyWithAnthropic(
html: string,
scrapedPrice: number,
currency: string,
apiKey: string
): Promise<AIVerificationResult> {
const anthropic = new Anthropic({ apiKey });
const preparedHtml = prepareHtmlForAI(html);
const prompt = VERIFICATION_PROMPT
.replace('$SCRAPED_PRICE$', scrapedPrice.toString())
.replace('$CURRENCY$', currency) + preparedHtml;
const response = await anthropic.messages.create({
model: 'claude-3-haiku-20240307',
max_tokens: 512,
messages: [{ role: 'user', content: prompt }],
});
const content = response.content[0];
if (content.type !== 'text') {
throw new Error('Unexpected response type from Anthropic');
}
return parseVerificationResponse(content.text, scrapedPrice, currency);
}
async function verifyWithOpenAI(
html: string,
scrapedPrice: number,
currency: string,
apiKey: string
): Promise<AIVerificationResult> {
const openai = new OpenAI({ apiKey });
const preparedHtml = prepareHtmlForAI(html);
const prompt = VERIFICATION_PROMPT
.replace('$SCRAPED_PRICE$', scrapedPrice.toString())
.replace('$CURRENCY$', currency) + preparedHtml;
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
max_tokens: 512,
messages: [{ role: 'user', content: prompt }],
});
const content = response.choices[0]?.message?.content;
if (!content) {
throw new Error('No response from OpenAI');
}
return parseVerificationResponse(content, scrapedPrice, currency);
}
async function verifyWithOllama(
html: string,
scrapedPrice: number,
currency: string,
baseUrl: string,
model: string
): Promise<AIVerificationResult> {
const preparedHtml = prepareHtmlForAI(html);
const prompt = VERIFICATION_PROMPT
.replace('$SCRAPED_PRICE$', scrapedPrice.toString())
.replace('$CURRENCY$', currency) + preparedHtml;
const response = await axios.post(
`${baseUrl}/api/chat`,
{
model: model,
messages: [{ role: 'user', content: prompt }],
stream: false,
},
{
headers: { 'Content-Type': 'application/json' },
timeout: 120000,
}
);
const content = response.data?.message?.content;
if (!content) {
throw new Error('No response from Ollama');
}
return parseVerificationResponse(content, scrapedPrice, currency);
}
function parseVerificationResponse(
responseText: string,
originalPrice: number,
originalCurrency: string
): AIVerificationResult {
console.log(`[AI Verify] Raw response: ${responseText.substring(0, 500)}...`);
// Default result if parsing fails
const defaultResult: AIVerificationResult = {
isCorrect: true, // Assume correct if we can't parse
confidence: 0.5,
suggestedPrice: null,
reason: 'Could not parse AI response',
};
let jsonStr = responseText.trim();
// Handle markdown code blocks
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (jsonMatch) {
jsonStr = jsonMatch[1].trim();
}
// Try to find JSON object
const objectMatch = jsonStr.match(/\{[\s\S]*\}/);
if (objectMatch) {
jsonStr = objectMatch[0];
}
try {
const data = JSON.parse(jsonStr);
console.log(`[AI Verify] Parsed:`, JSON.stringify(data, null, 2));
let suggestedPrice: ParsedPrice | null = null;
if (!data.isCorrect && data.suggestedPrice !== null && data.suggestedPrice !== undefined) {
const priceNum = typeof data.suggestedPrice === 'string'
? parseFloat(data.suggestedPrice.replace(/[^0-9.]/g, ''))
: data.suggestedPrice;
if (!isNaN(priceNum) && priceNum > 0) {
suggestedPrice = {
price: priceNum,
currency: data.suggestedCurrency || originalCurrency,
};
}
}
return {
isCorrect: data.isCorrect ?? true,
confidence: data.confidence ?? 0.5,
suggestedPrice,
reason: data.reason || 'No reason provided',
};
} catch (error) {
console.error('[AI Verify] Failed to parse response:', responseText);
return defaultResult;
}
}
function parseAIResponse(responseText: string): AIExtractionResult {
console.log(`[AI] Raw response: ${responseText.substring(0, 500)}...`);
@ -307,3 +487,40 @@ export async function tryAIExtraction(
return null;
}
}
// Export for use in scraper to verify scraped prices
export async function tryAIVerification(
url: string,
html: string,
scrapedPrice: number,
currency: string,
userId: number
): Promise<AIVerificationResult | null> {
try {
const { userQueries } = await import('../models');
const settings = await userQueries.getAISettings(userId);
// Check if AI verification is enabled (separate from AI extraction fallback)
if (!settings?.ai_verification_enabled) {
return null;
}
// Need a configured provider
if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
console.log(`[AI Verify] Using Anthropic to verify $${scrapedPrice} for ${url}`);
return await verifyWithAnthropic(html, scrapedPrice, currency, settings.anthropic_api_key);
} else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
console.log(`[AI Verify] Using OpenAI to verify $${scrapedPrice} for ${url}`);
return await verifyWithOpenAI(html, scrapedPrice, currency, settings.openai_api_key);
} else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) {
console.log(`[AI Verify] Using Ollama to verify $${scrapedPrice} for ${url}`);
return await verifyWithOllama(html, scrapedPrice, currency, settings.ollama_base_url, settings.ollama_model);
}
console.log(`[AI Verify] Verification enabled but no provider configured`);
return null;
} catch (error) {
console.error(`[AI Verify] Verification failed for ${url}:`, error);
return null;
}
}

View file

@ -818,7 +818,34 @@ export async function scrapeProduct(url: string, userId?: number): Promise<Scrap
result.imageUrl = $('meta[property="og:image"]').attr('content') || null;
}
// If we still don't have a price and userId is provided, try AI extraction
// If we have a price and userId is provided, try AI verification
if (result.price && userId && html) {
try {
const { tryAIVerification } = await import('./ai-extractor');
const verifyResult = await tryAIVerification(
url,
html,
result.price.price,
result.price.currency,
userId
);
if (verifyResult) {
if (verifyResult.isCorrect) {
console.log(`[AI Verify] Confirmed price $${result.price.price} is correct (confidence: ${verifyResult.confidence})`);
} else if (verifyResult.suggestedPrice && verifyResult.confidence > 0.6) {
console.log(`[AI Verify] Price correction: $${result.price.price} -> $${verifyResult.suggestedPrice.price} (${verifyResult.reason})`);
result.price = verifyResult.suggestedPrice;
} else {
console.log(`[AI Verify] Price might be incorrect but no confident suggestion: ${verifyResult.reason}`);
}
}
} catch (verifyError) {
console.error(`[AI Verify] Verification failed for ${url}:`, verifyError);
}
}
// If we still don't have a price and userId is provided, try AI extraction as fallback
if (!result.price && userId && html) {
try {
const { tryAIExtraction } = await import('./ai-extractor');