diff --git a/backend/package-lock.json b/backend/package-lock.json index a95cc13..418a4a0 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -8,6 +8,7 @@ "name": "priceghost-backend", "version": "1.0.0", "dependencies": { + "@anthropic-ai/sdk": "^0.24.0", "axios": "^1.6.0", "bcrypt": "^5.1.1", "cheerio": "^1.0.0-rc.12", @@ -16,6 +17,7 @@ "express": "^4.18.2", "jsonwebtoken": "^9.0.2", "node-cron": "^3.0.3", + "openai": "^4.47.0", "pg": "^8.11.3", "puppeteer": "^22.0.0", "puppeteer-extra": "^3.3.6", @@ -33,6 +35,37 @@ "typescript": "^5.3.2" } }, + "node_modules/@anthropic-ai/sdk": { + "version": "0.24.3", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.24.3.tgz", + "integrity": "sha512-916wJXO6T6k8R6BAAcLhLPv/pnLGy7YSEBZXZ1XTFbLcTZE8oTy3oDW9WJf9KKZwMvVcePIfoTSvzXHRcGxkQQ==", + "license": "MIT", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" + } + }, + "node_modules/@anthropic-ai/sdk/node_modules/@types/node": { + "version": "18.19.130", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@anthropic-ai/sdk/node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "license": "MIT" + }, "node_modules/@babel/code-frame": { "version": "7.28.6", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz", @@ -680,7 +713,6 @@ "version": "20.19.30", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.30.tgz", "integrity": "sha512-WJtwWJu7UdlvzEAUm484QNg5eAoq5QR08KDNx7g45Usrs2NtOPiX8ugDqmKdXkyL03rBqU5dYNYVQetEpBHq2g==", - "devOptional": true, "license": "MIT", "dependencies": { "undici-types": "~6.21.0" @@ -693,6 +725,16 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/node-fetch": { + "version": "2.6.13", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz", + "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==", + "license": "MIT", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.4" + } + }, "node_modules/@types/pg": { "version": "8.16.0", "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.16.0.tgz", @@ -768,6 +810,18 @@ "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", "license": "ISC" }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "license": "MIT", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -816,6 +870,18 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/agentkeepalive": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", + "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", + "license": "MIT", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", @@ -1867,6 +1933,15 @@ "node": ">= 0.6" } }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/events-universal": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", @@ -2055,6 +2130,34 @@ "node": ">= 6" } }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", + "license": "MIT" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "license": "MIT", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/formdata-node/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, "node_modules/forwarded": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", @@ -2489,6 +2592,15 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.0.0" + } + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", @@ -3021,6 +3133,26 @@ "node": ">=6.0.0" } }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "deprecated": "Use your platform's native DOMException instead", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "license": "MIT", + "engines": { + "node": ">=10.5.0" + } + }, "node_modules/node-fetch": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", @@ -3123,6 +3255,51 @@ "wrappy": "1" } }, + "node_modules/openai": { + "version": "4.104.0", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz", + "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", + "license": "Apache-2.0", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7" + }, + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/openai/node_modules/@types/node": { + "version": "18.19.130", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/openai/node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "license": "MIT" + }, "node_modules/pac-proxy-agent": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", @@ -4466,7 +4643,6 @@ "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "devOptional": true, "license": "MIT" }, "node_modules/universalify": { @@ -4526,6 +4702,15 @@ "node": ">= 0.8" } }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/backend/package.json b/backend/package.json index c8a2fe8..0144198 100644 --- a/backend/package.json +++ b/backend/package.json @@ -10,6 +10,7 @@ "db:init": "tsx src/config/init-db.ts" }, "dependencies": { + "@anthropic-ai/sdk": "^0.24.0", "axios": "^1.6.0", "bcrypt": "^5.1.1", "cheerio": "^1.0.0-rc.12", @@ -18,6 +19,7 @@ "express": "^4.18.2", "jsonwebtoken": "^9.0.2", "node-cron": "^3.0.3", + "openai": "^4.47.0", "pg": "^8.11.3", "puppeteer": "^22.0.0", "puppeteer-extra": "^3.3.6", diff --git a/backend/src/index.ts b/backend/src/index.ts index 2561cd3..14e70f6 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -9,6 +9,37 @@ import settingsRoutes from './routes/settings'; import profileRoutes from './routes/profile'; import adminRoutes from './routes/admin'; import { startScheduler } from './services/scheduler'; +import pool from './config/database'; + +// Run database migrations +async function runMigrations() { + const client = await pool.connect(); + try { + // Add AI settings columns to users table if they don't exist + await client.query(` + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_enabled') THEN + ALTER TABLE users ADD COLUMN ai_enabled BOOLEAN DEFAULT false; + END IF; + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_provider') THEN + ALTER TABLE users ADD COLUMN ai_provider VARCHAR(20); + END IF; + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'anthropic_api_key') THEN + ALTER TABLE users ADD COLUMN anthropic_api_key TEXT; + END IF; + IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'openai_api_key') THEN + ALTER TABLE users ADD COLUMN openai_api_key TEXT; + END IF; + END $$; + `); + console.log('Database migrations completed'); + } catch (error) { + console.error('Migration error:', error); + } finally { + client.release(); + } +} // Load environment variables dotenv.config(); @@ -47,9 +78,12 @@ app.use( ); // Start server -app.listen(PORT, () => { +app.listen(PORT, async () => { console.log(`PriceGhost API server running on port ${PORT}`); + // Run database migrations + await runMigrations(); + // Start the background price checker if (process.env.NODE_ENV !== 'test') { startScheduler(); diff --git a/backend/src/models/index.ts b/backend/src/models/index.ts index 5a8e0b0..9763030 100644 --- a/backend/src/models/index.ts +++ b/backend/src/models/index.ts @@ -27,6 +27,13 @@ export interface NotificationSettings { discord_webhook_url: string | null; } +export interface AISettings { + ai_enabled: boolean; + ai_provider: 'anthropic' | 'openai' | null; + anthropic_api_key: string | null; + openai_api_key: string | null; +} + export const userQueries = { findByEmail: async (email: string): Promise => { const result = await pool.query( @@ -155,6 +162,50 @@ export const userQueries = { ); return (result.rowCount ?? 0) > 0; }, + + getAISettings: async (id: number): Promise => { + const result = await pool.query( + 'SELECT ai_enabled, ai_provider, anthropic_api_key, openai_api_key FROM users WHERE id = $1', + [id] + ); + return result.rows[0] || null; + }, + + updateAISettings: async ( + id: number, + settings: Partial + ): Promise => { + const fields: string[] = []; + const values: (string | boolean | null)[] = []; + let paramIndex = 1; + + if (settings.ai_enabled !== undefined) { + fields.push(`ai_enabled = $${paramIndex++}`); + values.push(settings.ai_enabled); + } + if (settings.ai_provider !== undefined) { + fields.push(`ai_provider = $${paramIndex++}`); + values.push(settings.ai_provider); + } + if (settings.anthropic_api_key !== undefined) { + fields.push(`anthropic_api_key = $${paramIndex++}`); + values.push(settings.anthropic_api_key); + } + if (settings.openai_api_key !== undefined) { + fields.push(`openai_api_key = $${paramIndex++}`); + values.push(settings.openai_api_key); + } + + if (fields.length === 0) return null; + + values.push(id.toString()); + const result = await pool.query( + `UPDATE users SET ${fields.join(', ')} WHERE id = $${paramIndex} + RETURNING ai_enabled, ai_provider, anthropic_api_key, openai_api_key`, + values + ); + return result.rows[0] || null; + }, }; // System settings queries diff --git a/backend/src/routes/products.ts b/backend/src/routes/products.ts index 166e608..ae20862 100644 --- a/backend/src/routes/products.ts +++ b/backend/src/routes/products.ts @@ -39,8 +39,8 @@ router.post('/', async (req: AuthRequest, res: Response) => { return; } - // Scrape product info - const scrapedData = await scrapeProduct(url); + // Scrape product info (pass userId for AI fallback) + const scrapedData = await scrapeProduct(url, userId); // Allow adding out-of-stock products, but require a price for in-stock ones if (!scrapedData.price && scrapedData.stockStatus !== 'out_of_stock') { diff --git a/backend/src/routes/settings.ts b/backend/src/routes/settings.ts index 9ed46f7..a339edd 100644 --- a/backend/src/routes/settings.ts +++ b/backend/src/routes/settings.ts @@ -127,4 +127,89 @@ router.post('/notifications/test/discord', async (req: AuthRequest, res: Respons } }); +// Get AI settings +router.get('/ai', async (req: AuthRequest, res: Response) => { + try { + const userId = req.userId!; + const settings = await userQueries.getAISettings(userId); + + if (!settings) { + res.status(404).json({ error: 'User not found' }); + return; + } + + // Don't expose full API keys, just indicate if they're set + res.json({ + ai_enabled: settings.ai_enabled || false, + ai_provider: settings.ai_provider || null, + anthropic_configured: !!settings.anthropic_api_key, + openai_configured: !!settings.openai_api_key, + }); + } catch (error) { + console.error('Error fetching AI settings:', error); + res.status(500).json({ error: 'Failed to fetch AI settings' }); + } +}); + +// Update AI settings +router.put('/ai', async (req: AuthRequest, res: Response) => { + try { + const userId = req.userId!; + const { ai_enabled, ai_provider, anthropic_api_key, openai_api_key } = req.body; + + const settings = await userQueries.updateAISettings(userId, { + ai_enabled, + ai_provider, + anthropic_api_key, + openai_api_key, + }); + + if (!settings) { + res.status(400).json({ error: 'No settings to update' }); + return; + } + + res.json({ + ai_enabled: settings.ai_enabled || false, + ai_provider: settings.ai_provider || null, + anthropic_configured: !!settings.anthropic_api_key, + openai_configured: !!settings.openai_api_key, + message: 'AI settings updated successfully', + }); + } catch (error) { + console.error('Error updating AI settings:', error); + res.status(500).json({ error: 'Failed to update AI settings' }); + } +}); + +// Test AI extraction +router.post('/ai/test', async (req: AuthRequest, res: Response) => { + try { + const userId = req.userId!; + const { url } = req.body; + + if (!url) { + res.status(400).json({ error: 'URL is required' }); + return; + } + + const settings = await userQueries.getAISettings(userId); + if (!settings?.ai_enabled) { + res.status(400).json({ error: 'AI extraction is not enabled' }); + return; + } + + const { extractWithAI } = await import('../services/ai-extractor'); + const result = await extractWithAI(url, settings); + + res.json({ + success: !!result.price, + ...result, + }); + } catch (error) { + console.error('Error testing AI extraction:', error); + res.status(500).json({ error: 'Failed to test AI extraction' }); + } +}); + export default router; diff --git a/backend/src/services/ai-extractor.ts b/backend/src/services/ai-extractor.ts new file mode 100644 index 0000000..27dba90 --- /dev/null +++ b/backend/src/services/ai-extractor.ts @@ -0,0 +1,258 @@ +import Anthropic from '@anthropic-ai/sdk'; +import OpenAI from 'openai'; +import axios from 'axios'; +import { load } from 'cheerio'; +import { AISettings } from '../models'; +import { ParsedPrice } from '../utils/priceParser'; +import { StockStatus } from './scraper'; + +export interface AIExtractionResult { + name: string | null; + price: ParsedPrice | null; + imageUrl: string | null; + stockStatus: StockStatus; + confidence: number; +} + +const EXTRACTION_PROMPT = `You are a price extraction assistant. Analyze the following HTML content from a product page and extract the product information. + +Return a JSON object with these fields: +- name: The product name/title (string or null) +- price: The current selling price as a number (not the original/crossed-out price) +- currency: The currency code (USD, EUR, GBP, etc.) +- imageUrl: The main product image URL (string or null) +- stockStatus: One of "in_stock", "out_of_stock", or "unknown" +- confidence: Your confidence in the extraction from 0 to 1 + +Important: +- Extract the CURRENT/SALE price, not the original price if there's a discount +- If you can't find a price with confidence, set price to null +- Only return valid JSON, no explanation text + +HTML Content: +`; + +// Truncate HTML to fit within token limits while preserving important content +function prepareHtmlForAI(html: string): string { + const $ = load(html); + + // Remove script, style, and other non-content elements + $('script, style, noscript, iframe, svg, path, meta, link, comment').remove(); + + // Get the body content + let content = $('body').html() || html; + + // Try to focus on product-related sections if possible + const productSelectors = [ + '[itemtype*="Product"]', + '[class*="product"]', + '[id*="product"]', + '[class*="pdp"]', + 'main', + '[role="main"]', + ]; + + for (const selector of productSelectors) { + const section = $(selector).first(); + if (section.length && section.html() && section.html()!.length > 500) { + content = section.html()!; + break; + } + } + + // Also extract JSON-LD data which often contains product info + const jsonLdScripts: string[] = []; + $('script[type="application/ld+json"]').each((_, el) => { + const scriptContent = $(el).html(); + if (scriptContent && scriptContent.includes('price')) { + jsonLdScripts.push(scriptContent); + } + }); + + // Combine content with JSON-LD data + let finalContent = content; + if (jsonLdScripts.length > 0) { + finalContent = `JSON-LD Data:\n${jsonLdScripts.join('\n')}\n\nHTML Content:\n${content}`; + } + + // Truncate to ~15000 characters to stay within token limits + if (finalContent.length > 15000) { + finalContent = finalContent.substring(0, 15000) + '\n... [truncated]'; + } + + return finalContent; +} + +async function extractWithAnthropic( + html: string, + apiKey: string +): Promise { + const anthropic = new Anthropic({ apiKey }); + + const preparedHtml = prepareHtmlForAI(html); + + const response = await anthropic.messages.create({ + model: 'claude-3-haiku-20240307', + max_tokens: 1024, + messages: [ + { + role: 'user', + content: EXTRACTION_PROMPT + preparedHtml, + }, + ], + }); + + const content = response.content[0]; + if (content.type !== 'text') { + throw new Error('Unexpected response type from Anthropic'); + } + + return parseAIResponse(content.text); +} + +async function extractWithOpenAI( + html: string, + apiKey: string +): Promise { + const openai = new OpenAI({ apiKey }); + + const preparedHtml = prepareHtmlForAI(html); + + const response = await openai.chat.completions.create({ + model: 'gpt-4o-mini', + max_tokens: 1024, + messages: [ + { + role: 'user', + content: EXTRACTION_PROMPT + preparedHtml, + }, + ], + }); + + const content = response.choices[0]?.message?.content; + if (!content) { + throw new Error('No response from OpenAI'); + } + + return parseAIResponse(content); +} + +function parseAIResponse(responseText: string): AIExtractionResult { + // Try to extract JSON from the response + let jsonStr = responseText.trim(); + + // Handle markdown code blocks + const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/); + if (jsonMatch) { + jsonStr = jsonMatch[1].trim(); + } + + // Try to find JSON object in the response + const objectMatch = jsonStr.match(/\{[\s\S]*\}/); + if (objectMatch) { + jsonStr = objectMatch[0]; + } + + try { + const data = JSON.parse(jsonStr); + + let price: ParsedPrice | null = null; + if (data.price !== null && data.price !== undefined) { + const priceNum = typeof data.price === 'string' + ? parseFloat(data.price.replace(/[^0-9.]/g, '')) + : data.price; + + if (!isNaN(priceNum) && priceNum > 0) { + price = { + price: priceNum, + currency: data.currency || 'USD', + }; + } + } + + let stockStatus: StockStatus = 'unknown'; + if (data.stockStatus) { + const status = data.stockStatus.toLowerCase().replace(/[^a-z_]/g, ''); + if (status === 'in_stock' || status === 'instock') { + stockStatus = 'in_stock'; + } else if (status === 'out_of_stock' || status === 'outofstock') { + stockStatus = 'out_of_stock'; + } + } + + return { + name: data.name || null, + price, + imageUrl: data.imageUrl || data.image || null, + stockStatus, + confidence: data.confidence || 0.5, + }; + } catch (error) { + console.error('Failed to parse AI response:', responseText); + return { + name: null, + price: null, + imageUrl: null, + stockStatus: 'unknown', + confidence: 0, + }; + } +} + +export async function extractWithAI( + url: string, + settings: AISettings +): Promise { + // Fetch the page HTML + const response = await axios.get(url, { + headers: { + 'User-Agent': + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + Accept: + 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + }, + timeout: 20000, + }); + + const html = response.data; + + // Use the configured provider + if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) { + return extractWithAnthropic(html, settings.anthropic_api_key); + } else if (settings.ai_provider === 'openai' && settings.openai_api_key) { + return extractWithOpenAI(html, settings.openai_api_key); + } + + throw new Error('No valid AI provider configured'); +} + +// Export for use in scraper as fallback +export async function tryAIExtraction( + url: string, + html: string, + userId: number +): Promise { + try { + // Import dynamically to avoid circular dependencies + const { userQueries } = await import('../models'); + const settings = await userQueries.getAISettings(userId); + + if (!settings?.ai_enabled) { + return null; + } + + // Use the configured provider + if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) { + console.log(`[AI] Using Anthropic for ${url}`); + return await extractWithAnthropic(html, settings.anthropic_api_key); + } else if (settings.ai_provider === 'openai' && settings.openai_api_key) { + console.log(`[AI] Using OpenAI for ${url}`); + return await extractWithOpenAI(html, settings.openai_api_key); + } + + return null; + } catch (error) { + console.error(`[AI] Extraction failed for ${url}:`, error); + return null; + } +} diff --git a/backend/src/services/scheduler.ts b/backend/src/services/scheduler.ts index fe8c7ef..152734f 100644 --- a/backend/src/services/scheduler.ts +++ b/backend/src/services/scheduler.ts @@ -23,7 +23,7 @@ async function checkPrices(): Promise { try { console.log(`Checking price for product ${product.id}: ${product.url}`); - const scrapedData = await scrapeProduct(product.url); + const scrapedData = await scrapeProduct(product.url, product.user_id); // Check for back-in-stock notification const wasOutOfStock = product.stock_status === 'out_of_stock'; diff --git a/backend/src/services/scraper.ts b/backend/src/services/scraper.ts index 5b7f655..1481254 100644 --- a/backend/src/services/scraper.ts +++ b/backend/src/services/scraper.ts @@ -664,7 +664,7 @@ const genericImageSelectors = [ 'img[class*="product"]', ]; -export async function scrapeProduct(url: string): Promise { +export async function scrapeProduct(url: string, userId?: number): Promise { const result: ScrapedProduct = { name: null, price: null, @@ -673,8 +673,9 @@ export async function scrapeProduct(url: string): Promise { stockStatus: 'unknown', }; + let html: string = ''; + try { - let html: string; let usedBrowser = false; try { @@ -766,6 +767,26 @@ export async function scrapeProduct(url: string): Promise { if (!result.imageUrl) { result.imageUrl = $('meta[property="og:image"]').attr('content') || null; } + + // If we still don't have a price and userId is provided, try AI extraction + if (!result.price && userId && html) { + try { + const { tryAIExtraction } = await import('./ai-extractor'); + const aiResult = await tryAIExtraction(url, html, userId); + + if (aiResult && aiResult.price && aiResult.confidence > 0.5) { + console.log(`[AI] Successfully extracted price for ${url}: ${aiResult.price.price} (confidence: ${aiResult.confidence})`); + result.price = aiResult.price; + if (!result.name && aiResult.name) result.name = aiResult.name; + if (!result.imageUrl && aiResult.imageUrl) result.imageUrl = aiResult.imageUrl; + if (result.stockStatus === 'unknown' && aiResult.stockStatus !== 'unknown') { + result.stockStatus = aiResult.stockStatus; + } + } + } catch (aiError) { + console.error(`[AI] Extraction failed for ${url}:`, aiError); + } + } } catch (error) { console.error(`Error scraping ${url}:`, error); } diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index f39f849..e401313 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -143,8 +143,39 @@ export const settingsApi = { testDiscord: () => api.post<{ message: string }>('/settings/notifications/test/discord'), + + // AI Settings + getAI: () => + api.get('/settings/ai'), + + updateAI: (data: { + ai_enabled?: boolean; + ai_provider?: 'anthropic' | 'openai' | null; + anthropic_api_key?: string | null; + openai_api_key?: string | null; + }) => api.put('/settings/ai', data), + + testAI: (url: string) => + api.post('/settings/ai/test', { url }), }; +// AI Settings types +export interface AISettings { + ai_enabled: boolean; + ai_provider: 'anthropic' | 'openai' | null; + anthropic_configured: boolean; + openai_configured: boolean; +} + +export interface AITestResult { + success: boolean; + name: string | null; + price: { price: number; currency: string } | null; + imageUrl: string | null; + stockStatus: string; + confidence: number; +} + // Profile API export interface UserProfile { id: number; diff --git a/frontend/src/pages/Settings.tsx b/frontend/src/pages/Settings.tsx index 12ef305..f58b07f 100644 --- a/frontend/src/pages/Settings.tsx +++ b/frontend/src/pages/Settings.tsx @@ -6,11 +6,12 @@ import { profileApi, adminApi, NotificationSettings, + AISettings, UserProfile, SystemSettings, } from '../api/client'; -type SettingsSection = 'profile' | 'notifications' | 'admin'; +type SettingsSection = 'profile' | 'notifications' | 'ai' | 'admin'; export default function Settings() { const [activeSection, setActiveSection] = useState('profile'); @@ -35,6 +36,16 @@ export default function Settings() { const [isSavingNotifications, setIsSavingNotifications] = useState(false); const [isTesting, setIsTesting] = useState<'telegram' | 'discord' | null>(null); + // AI state + const [aiSettings, setAISettings] = useState(null); + const [aiEnabled, setAIEnabled] = useState(false); + const [aiProvider, setAIProvider] = useState<'anthropic' | 'openai'>('anthropic'); + const [anthropicApiKey, setAnthropicApiKey] = useState(''); + const [openaiApiKey, setOpenaiApiKey] = useState(''); + const [isSavingAI, setIsSavingAI] = useState(false); + const [isTestingAI, setIsTestingAI] = useState(false); + const [testUrl, setTestUrl] = useState(''); + // Admin state const [users, setUsers] = useState([]); const [systemSettings, setSystemSettings] = useState(null); @@ -52,9 +63,10 @@ export default function Settings() { const fetchInitialData = async () => { try { - const [profileRes, notificationsRes] = await Promise.all([ + const [profileRes, notificationsRes, aiRes] = await Promise.all([ profileApi.get(), settingsApi.getNotifications(), + settingsApi.getAI(), ]); setProfile(profileRes.data); setProfileName(profileRes.data.name || ''); @@ -62,6 +74,11 @@ export default function Settings() { if (notificationsRes.data.telegram_chat_id) { setTelegramChatId(notificationsRes.data.telegram_chat_id); } + setAISettings(aiRes.data); + setAIEnabled(aiRes.data.ai_enabled); + if (aiRes.data.ai_provider) { + setAIProvider(aiRes.data.ai_provider); + } } catch { setError('Failed to load settings'); } finally { @@ -199,6 +216,53 @@ export default function Settings() { } }; + // AI handlers + const handleSaveAI = async () => { + clearMessages(); + setIsSavingAI(true); + try { + const response = await settingsApi.updateAI({ + ai_enabled: aiEnabled, + ai_provider: aiProvider, + anthropic_api_key: anthropicApiKey || undefined, + openai_api_key: openaiApiKey || undefined, + }); + setAISettings(response.data); + setAnthropicApiKey(''); + setOpenaiApiKey(''); + setSuccess('AI settings saved successfully'); + } catch { + setError('Failed to save AI settings'); + } finally { + setIsSavingAI(false); + } + }; + + const handleTestAI = async () => { + clearMessages(); + if (!testUrl) { + setError('Please enter a URL to test'); + return; + } + setIsTestingAI(true); + try { + const response = await settingsApi.testAI(testUrl); + if (response.data.success && response.data.price) { + setSuccess( + `AI extraction successful! Found: ${response.data.name || 'Unknown'} - ` + + `${response.data.price.currency} ${response.data.price.price.toFixed(2)} ` + + `(confidence: ${(response.data.confidence * 100).toFixed(0)}%)` + ); + } else { + setError('AI could not extract price from this URL'); + } + } catch { + setError('Failed to test AI extraction'); + } finally { + setIsTestingAI(false); + } + }; + // Admin handlers const handleToggleRegistration = async () => { clearMessages(); @@ -660,6 +724,17 @@ export default function Settings() { Notifications + {profile?.is_admin && ( + + + + {aiSettings?.ai_enabled && (aiSettings.anthropic_configured || aiSettings.openai_configured) && ( +
+
+ 🧪 +

Test AI Extraction

+
+

+ Test AI extraction on a product URL to see if it can successfully extract the price. +

+ +
+ + setTestUrl(e.target.value)} + placeholder="https://example.com/product" + /> +
+ +
+ +
+
+ )} + + )} + {activeSection === 'admin' && profile?.is_admin && ( <>