mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-04-25 00:36:32 +02:00
Add AI price verification feature
When enabled, AI verifies every scraped price to ensure accuracy. This catches issues like scraped "savings" amounts instead of actual prices. - Add ai_verification_enabled column to users table - Create verification prompt and functions for Anthropic, OpenAI, Ollama - Integrate verification step into scraper after traditional scraping - Add verification toggle to Settings page (separate from AI extraction) - AI verification is independent of AI extraction fallback Flow: Traditional scraping -> AI verification (if enabled) -> AI extraction fallback (if no price found) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
7afa3ccec3
commit
dc4e7b9665
6 changed files with 278 additions and 4 deletions
|
|
@ -58,6 +58,9 @@ async function runMigrations() {
|
|||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ntfy_enabled') THEN
|
||||
ALTER TABLE users ADD COLUMN ntfy_enabled BOOLEAN DEFAULT true;
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_verification_enabled') THEN
|
||||
ALTER TABLE users ADD COLUMN ai_verification_enabled BOOLEAN DEFAULT false;
|
||||
END IF;
|
||||
END $$;
|
||||
`);
|
||||
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ export interface NotificationSettings {
|
|||
|
||||
export interface AISettings {
|
||||
ai_enabled: boolean;
|
||||
ai_verification_enabled: boolean;
|
||||
ai_provider: 'anthropic' | 'openai' | 'ollama' | null;
|
||||
anthropic_api_key: string | null;
|
||||
openai_api_key: string | null;
|
||||
|
|
@ -209,7 +210,9 @@ export const userQueries = {
|
|||
|
||||
getAISettings: async (id: number): Promise<AISettings | null> => {
|
||||
const result = await pool.query(
|
||||
'SELECT ai_enabled, ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model FROM users WHERE id = $1',
|
||||
`SELECT ai_enabled, COALESCE(ai_verification_enabled, false) as ai_verification_enabled,
|
||||
ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model
|
||||
FROM users WHERE id = $1`,
|
||||
[id]
|
||||
);
|
||||
return result.rows[0] || null;
|
||||
|
|
@ -227,6 +230,10 @@ export const userQueries = {
|
|||
fields.push(`ai_enabled = $${paramIndex++}`);
|
||||
values.push(settings.ai_enabled);
|
||||
}
|
||||
if (settings.ai_verification_enabled !== undefined) {
|
||||
fields.push(`ai_verification_enabled = $${paramIndex++}`);
|
||||
values.push(settings.ai_verification_enabled);
|
||||
}
|
||||
if (settings.ai_provider !== undefined) {
|
||||
fields.push(`ai_provider = $${paramIndex++}`);
|
||||
values.push(settings.ai_provider);
|
||||
|
|
@ -253,7 +260,8 @@ export const userQueries = {
|
|||
values.push(id.toString());
|
||||
const result = await pool.query(
|
||||
`UPDATE users SET ${fields.join(', ')} WHERE id = $${paramIndex}
|
||||
RETURNING ai_enabled, ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model`,
|
||||
RETURNING ai_enabled, COALESCE(ai_verification_enabled, false) as ai_verification_enabled,
|
||||
ai_provider, anthropic_api_key, openai_api_key, ollama_base_url, ollama_model`,
|
||||
values
|
||||
);
|
||||
return result.rows[0] || null;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,39 @@ export interface AIExtractionResult {
|
|||
confidence: number;
|
||||
}
|
||||
|
||||
export interface AIVerificationResult {
|
||||
isCorrect: boolean;
|
||||
confidence: number;
|
||||
suggestedPrice: ParsedPrice | null;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
const VERIFICATION_PROMPT = `You are a price verification assistant. I scraped a product page and found a price. Please verify if this price is correct.
|
||||
|
||||
Scraped Price: $SCRAPED_PRICE$ $CURRENCY$
|
||||
|
||||
Analyze the HTML content below and determine:
|
||||
1. Is the scraped price the correct CURRENT/SALE price for the main product?
|
||||
2. If not, what is the correct price?
|
||||
|
||||
Common issues to watch for:
|
||||
- Scraped price might be a "savings" amount (e.g., "Save $189.99")
|
||||
- Scraped price might be from a bundle/combo deal section
|
||||
- Scraped price might be shipping cost or add-on price
|
||||
- Scraped price might be the original/crossed-out price instead of the sale price
|
||||
|
||||
Return a JSON object with:
|
||||
- isCorrect: boolean - true if the scraped price is correct
|
||||
- confidence: number from 0 to 1
|
||||
- suggestedPrice: the correct price as a number (or null if scraped price is correct)
|
||||
- suggestedCurrency: currency code if suggesting a different price
|
||||
- reason: brief explanation of your decision
|
||||
|
||||
Only return valid JSON, no explanation text outside the JSON.
|
||||
|
||||
HTML Content:
|
||||
`;
|
||||
|
||||
const EXTRACTION_PROMPT = `You are a price extraction assistant. Analyze the following HTML content from a product page and extract the product information.
|
||||
|
||||
Return a JSON object with these fields:
|
||||
|
|
@ -180,6 +213,153 @@ async function extractWithOllama(
|
|||
return parseAIResponse(content);
|
||||
}
|
||||
|
||||
// Verification functions for each provider
|
||||
async function verifyWithAnthropic(
|
||||
html: string,
|
||||
scrapedPrice: number,
|
||||
currency: string,
|
||||
apiKey: string
|
||||
): Promise<AIVerificationResult> {
|
||||
const anthropic = new Anthropic({ apiKey });
|
||||
|
||||
const preparedHtml = prepareHtmlForAI(html);
|
||||
const prompt = VERIFICATION_PROMPT
|
||||
.replace('$SCRAPED_PRICE$', scrapedPrice.toString())
|
||||
.replace('$CURRENCY$', currency) + preparedHtml;
|
||||
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-3-haiku-20240307',
|
||||
max_tokens: 512,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
const content = response.content[0];
|
||||
if (content.type !== 'text') {
|
||||
throw new Error('Unexpected response type from Anthropic');
|
||||
}
|
||||
|
||||
return parseVerificationResponse(content.text, scrapedPrice, currency);
|
||||
}
|
||||
|
||||
async function verifyWithOpenAI(
|
||||
html: string,
|
||||
scrapedPrice: number,
|
||||
currency: string,
|
||||
apiKey: string
|
||||
): Promise<AIVerificationResult> {
|
||||
const openai = new OpenAI({ apiKey });
|
||||
|
||||
const preparedHtml = prepareHtmlForAI(html);
|
||||
const prompt = VERIFICATION_PROMPT
|
||||
.replace('$SCRAPED_PRICE$', scrapedPrice.toString())
|
||||
.replace('$CURRENCY$', currency) + preparedHtml;
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: 'gpt-4o-mini',
|
||||
max_tokens: 512,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
});
|
||||
|
||||
const content = response.choices[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new Error('No response from OpenAI');
|
||||
}
|
||||
|
||||
return parseVerificationResponse(content, scrapedPrice, currency);
|
||||
}
|
||||
|
||||
async function verifyWithOllama(
|
||||
html: string,
|
||||
scrapedPrice: number,
|
||||
currency: string,
|
||||
baseUrl: string,
|
||||
model: string
|
||||
): Promise<AIVerificationResult> {
|
||||
const preparedHtml = prepareHtmlForAI(html);
|
||||
const prompt = VERIFICATION_PROMPT
|
||||
.replace('$SCRAPED_PRICE$', scrapedPrice.toString())
|
||||
.replace('$CURRENCY$', currency) + preparedHtml;
|
||||
|
||||
const response = await axios.post(
|
||||
`${baseUrl}/api/chat`,
|
||||
{
|
||||
model: model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
stream: false,
|
||||
},
|
||||
{
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
timeout: 120000,
|
||||
}
|
||||
);
|
||||
|
||||
const content = response.data?.message?.content;
|
||||
if (!content) {
|
||||
throw new Error('No response from Ollama');
|
||||
}
|
||||
|
||||
return parseVerificationResponse(content, scrapedPrice, currency);
|
||||
}
|
||||
|
||||
function parseVerificationResponse(
|
||||
responseText: string,
|
||||
originalPrice: number,
|
||||
originalCurrency: string
|
||||
): AIVerificationResult {
|
||||
console.log(`[AI Verify] Raw response: ${responseText.substring(0, 500)}...`);
|
||||
|
||||
// Default result if parsing fails
|
||||
const defaultResult: AIVerificationResult = {
|
||||
isCorrect: true, // Assume correct if we can't parse
|
||||
confidence: 0.5,
|
||||
suggestedPrice: null,
|
||||
reason: 'Could not parse AI response',
|
||||
};
|
||||
|
||||
let jsonStr = responseText.trim();
|
||||
|
||||
// Handle markdown code blocks
|
||||
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (jsonMatch) {
|
||||
jsonStr = jsonMatch[1].trim();
|
||||
}
|
||||
|
||||
// Try to find JSON object
|
||||
const objectMatch = jsonStr.match(/\{[\s\S]*\}/);
|
||||
if (objectMatch) {
|
||||
jsonStr = objectMatch[0];
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(jsonStr);
|
||||
console.log(`[AI Verify] Parsed:`, JSON.stringify(data, null, 2));
|
||||
|
||||
let suggestedPrice: ParsedPrice | null = null;
|
||||
if (!data.isCorrect && data.suggestedPrice !== null && data.suggestedPrice !== undefined) {
|
||||
const priceNum = typeof data.suggestedPrice === 'string'
|
||||
? parseFloat(data.suggestedPrice.replace(/[^0-9.]/g, ''))
|
||||
: data.suggestedPrice;
|
||||
|
||||
if (!isNaN(priceNum) && priceNum > 0) {
|
||||
suggestedPrice = {
|
||||
price: priceNum,
|
||||
currency: data.suggestedCurrency || originalCurrency,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
isCorrect: data.isCorrect ?? true,
|
||||
confidence: data.confidence ?? 0.5,
|
||||
suggestedPrice,
|
||||
reason: data.reason || 'No reason provided',
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[AI Verify] Failed to parse response:', responseText);
|
||||
return defaultResult;
|
||||
}
|
||||
}
|
||||
|
||||
function parseAIResponse(responseText: string): AIExtractionResult {
|
||||
console.log(`[AI] Raw response: ${responseText.substring(0, 500)}...`);
|
||||
|
||||
|
|
@ -307,3 +487,40 @@ export async function tryAIExtraction(
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Export for use in scraper to verify scraped prices
|
||||
export async function tryAIVerification(
|
||||
url: string,
|
||||
html: string,
|
||||
scrapedPrice: number,
|
||||
currency: string,
|
||||
userId: number
|
||||
): Promise<AIVerificationResult | null> {
|
||||
try {
|
||||
const { userQueries } = await import('../models');
|
||||
const settings = await userQueries.getAISettings(userId);
|
||||
|
||||
// Check if AI verification is enabled (separate from AI extraction fallback)
|
||||
if (!settings?.ai_verification_enabled) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Need a configured provider
|
||||
if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
|
||||
console.log(`[AI Verify] Using Anthropic to verify $${scrapedPrice} for ${url}`);
|
||||
return await verifyWithAnthropic(html, scrapedPrice, currency, settings.anthropic_api_key);
|
||||
} else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
|
||||
console.log(`[AI Verify] Using OpenAI to verify $${scrapedPrice} for ${url}`);
|
||||
return await verifyWithOpenAI(html, scrapedPrice, currency, settings.openai_api_key);
|
||||
} else if (settings.ai_provider === 'ollama' && settings.ollama_base_url && settings.ollama_model) {
|
||||
console.log(`[AI Verify] Using Ollama to verify $${scrapedPrice} for ${url}`);
|
||||
return await verifyWithOllama(html, scrapedPrice, currency, settings.ollama_base_url, settings.ollama_model);
|
||||
}
|
||||
|
||||
console.log(`[AI Verify] Verification enabled but no provider configured`);
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error(`[AI Verify] Verification failed for ${url}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -818,7 +818,34 @@ export async function scrapeProduct(url: string, userId?: number): Promise<Scrap
|
|||
result.imageUrl = $('meta[property="og:image"]').attr('content') || null;
|
||||
}
|
||||
|
||||
// If we still don't have a price and userId is provided, try AI extraction
|
||||
// If we have a price and userId is provided, try AI verification
|
||||
if (result.price && userId && html) {
|
||||
try {
|
||||
const { tryAIVerification } = await import('./ai-extractor');
|
||||
const verifyResult = await tryAIVerification(
|
||||
url,
|
||||
html,
|
||||
result.price.price,
|
||||
result.price.currency,
|
||||
userId
|
||||
);
|
||||
|
||||
if (verifyResult) {
|
||||
if (verifyResult.isCorrect) {
|
||||
console.log(`[AI Verify] Confirmed price $${result.price.price} is correct (confidence: ${verifyResult.confidence})`);
|
||||
} else if (verifyResult.suggestedPrice && verifyResult.confidence > 0.6) {
|
||||
console.log(`[AI Verify] Price correction: $${result.price.price} -> $${verifyResult.suggestedPrice.price} (${verifyResult.reason})`);
|
||||
result.price = verifyResult.suggestedPrice;
|
||||
} else {
|
||||
console.log(`[AI Verify] Price might be incorrect but no confident suggestion: ${verifyResult.reason}`);
|
||||
}
|
||||
}
|
||||
} catch (verifyError) {
|
||||
console.error(`[AI Verify] Verification failed for ${url}:`, verifyError);
|
||||
}
|
||||
}
|
||||
|
||||
// If we still don't have a price and userId is provided, try AI extraction as fallback
|
||||
if (!result.price && userId && html) {
|
||||
try {
|
||||
const { tryAIExtraction } = await import('./ai-extractor');
|
||||
|
|
|
|||
|
|
@ -196,6 +196,7 @@ export const settingsApi = {
|
|||
|
||||
updateAI: (data: {
|
||||
ai_enabled?: boolean;
|
||||
ai_verification_enabled?: boolean;
|
||||
ai_provider?: 'anthropic' | 'openai' | 'ollama' | null;
|
||||
anthropic_api_key?: string | null;
|
||||
openai_api_key?: string | null;
|
||||
|
|
@ -213,6 +214,7 @@ export const settingsApi = {
|
|||
// AI Settings types
|
||||
export interface AISettings {
|
||||
ai_enabled: boolean;
|
||||
ai_verification_enabled: boolean;
|
||||
ai_provider: 'anthropic' | 'openai' | 'ollama' | null;
|
||||
anthropic_api_key: string | null;
|
||||
openai_api_key: string | null;
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ export default function Settings() {
|
|||
// AI state
|
||||
const [aiSettings, setAISettings] = useState<AISettings | null>(null);
|
||||
const [aiEnabled, setAIEnabled] = useState(false);
|
||||
const [aiVerificationEnabled, setAIVerificationEnabled] = useState(false);
|
||||
const [aiProvider, setAIProvider] = useState<'anthropic' | 'openai' | 'ollama'>('anthropic');
|
||||
const [anthropicApiKey, setAnthropicApiKey] = useState('');
|
||||
const [openaiApiKey, setOpenaiApiKey] = useState('');
|
||||
|
|
@ -97,6 +98,7 @@ export default function Settings() {
|
|||
// Populate AI fields with actual values
|
||||
setAISettings(aiRes.data);
|
||||
setAIEnabled(aiRes.data.ai_enabled);
|
||||
setAIVerificationEnabled(aiRes.data.ai_verification_enabled ?? false);
|
||||
if (aiRes.data.ai_provider) {
|
||||
setAIProvider(aiRes.data.ai_provider);
|
||||
}
|
||||
|
|
@ -353,6 +355,7 @@ export default function Settings() {
|
|||
try {
|
||||
const response = await settingsApi.updateAI({
|
||||
ai_enabled: aiEnabled,
|
||||
ai_verification_enabled: aiVerificationEnabled,
|
||||
ai_provider: aiProvider,
|
||||
anthropic_api_key: anthropicApiKey || undefined,
|
||||
openai_api_key: openaiApiKey || undefined,
|
||||
|
|
@ -360,6 +363,7 @@ export default function Settings() {
|
|||
ollama_model: aiProvider === 'ollama' ? ollamaModel || null : undefined,
|
||||
});
|
||||
setAISettings(response.data);
|
||||
setAIVerificationEnabled(response.data.ai_verification_enabled ?? false);
|
||||
setAnthropicApiKey('');
|
||||
setOpenaiApiKey('');
|
||||
setSuccess('AI settings saved successfully');
|
||||
|
|
@ -1287,7 +1291,20 @@ export default function Settings() {
|
|||
/>
|
||||
</div>
|
||||
|
||||
{aiEnabled && (
|
||||
<div className="settings-toggle">
|
||||
<div className="settings-toggle-label">
|
||||
<span className="settings-toggle-title">Enable AI Verification</span>
|
||||
<span className="settings-toggle-description">
|
||||
Verify all scraped prices with AI to ensure accuracy
|
||||
</span>
|
||||
</div>
|
||||
<button
|
||||
className={`toggle-switch ${aiVerificationEnabled ? 'active' : ''}`}
|
||||
onClick={() => setAIVerificationEnabled(!aiVerificationEnabled)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{(aiEnabled || aiVerificationEnabled) && (
|
||||
<>
|
||||
<div className="settings-form-group">
|
||||
<label>AI Provider</label>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue