mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-04-25 00:36:32 +02:00
Add AI-powered price extraction fallback
- Add AI extraction service supporting Anthropic (Claude) and OpenAI - Add AI settings UI in Settings page with provider selection - Add database migration for AI settings columns - Integrate AI fallback into scraper when standard methods fail - Add API endpoints for AI settings and test extraction Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
cfca33b4ea
commit
d98138fe7c
11 changed files with 887 additions and 10 deletions
189
backend/package-lock.json
generated
189
backend/package-lock.json
generated
|
|
@ -8,6 +8,7 @@
|
|||
"name": "priceghost-backend",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.24.0",
|
||||
"axios": "^1.6.0",
|
||||
"bcrypt": "^5.1.1",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
|
|
@ -16,6 +17,7 @@
|
|||
"express": "^4.18.2",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"node-cron": "^3.0.3",
|
||||
"openai": "^4.47.0",
|
||||
"pg": "^8.11.3",
|
||||
"puppeteer": "^22.0.0",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
|
|
@ -33,6 +35,37 @@
|
|||
"typescript": "^5.3.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.24.3",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.24.3.tgz",
|
||||
"integrity": "sha512-916wJXO6T6k8R6BAAcLhLPv/pnLGy7YSEBZXZ1XTFbLcTZE8oTy3oDW9WJf9KKZwMvVcePIfoTSvzXHRcGxkQQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "^18.11.18",
|
||||
"@types/node-fetch": "^2.6.4",
|
||||
"abort-controller": "^3.0.0",
|
||||
"agentkeepalive": "^4.2.1",
|
||||
"form-data-encoder": "1.7.2",
|
||||
"formdata-node": "^4.3.2",
|
||||
"node-fetch": "^2.6.7",
|
||||
"web-streams-polyfill": "^3.2.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk/node_modules/@types/node": {
|
||||
"version": "18.19.130",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
|
||||
"integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk/node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.28.6.tgz",
|
||||
|
|
@ -680,7 +713,6 @@
|
|||
"version": "20.19.30",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.30.tgz",
|
||||
"integrity": "sha512-WJtwWJu7UdlvzEAUm484QNg5eAoq5QR08KDNx7g45Usrs2NtOPiX8ugDqmKdXkyL03rBqU5dYNYVQetEpBHq2g==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
|
|
@ -693,6 +725,16 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node-fetch": {
|
||||
"version": "2.6.13",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/node": "*",
|
||||
"form-data": "^4.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pg": {
|
||||
"version": "8.16.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.16.0.tgz",
|
||||
|
|
@ -768,6 +810,18 @@
|
|||
"integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/abort-controller": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
||||
"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"event-target-shim": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.5"
|
||||
}
|
||||
},
|
||||
"node_modules/accepts": {
|
||||
"version": "1.3.8",
|
||||
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
|
||||
|
|
@ -816,6 +870,18 @@
|
|||
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/agentkeepalive": {
|
||||
"version": "4.6.0",
|
||||
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz",
|
||||
"integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"humanize-ms": "^1.2.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ansi-regex": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
||||
|
|
@ -1867,6 +1933,15 @@
|
|||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/event-target-shim": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
|
||||
"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/events-universal": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
|
||||
|
|
@ -2055,6 +2130,34 @@
|
|||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/form-data-encoder": {
|
||||
"version": "1.7.2",
|
||||
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
|
||||
"integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/formdata-node": {
|
||||
"version": "4.4.1",
|
||||
"resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
|
||||
"integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-domexception": "1.0.0",
|
||||
"web-streams-polyfill": "4.0.0-beta.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 12.20"
|
||||
}
|
||||
},
|
||||
"node_modules/formdata-node/node_modules/web-streams-polyfill": {
|
||||
"version": "4.0.0-beta.3",
|
||||
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
|
||||
"integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
}
|
||||
},
|
||||
"node_modules/forwarded": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
|
||||
|
|
@ -2489,6 +2592,15 @@
|
|||
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/humanize-ms": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
|
||||
"integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ms": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/iconv-lite": {
|
||||
"version": "0.6.3",
|
||||
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
|
||||
|
|
@ -3021,6 +3133,26 @@
|
|||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-domexception": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
|
||||
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
|
||||
"deprecated": "Use your platform's native DOMException instead",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/jimmywarting"
|
||||
},
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://paypal.me/jimmywarting"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
|
|
@ -3123,6 +3255,51 @@
|
|||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/openai": {
|
||||
"version": "4.104.0",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz",
|
||||
"integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@types/node": "^18.11.18",
|
||||
"@types/node-fetch": "^2.6.4",
|
||||
"abort-controller": "^3.0.0",
|
||||
"agentkeepalive": "^4.2.1",
|
||||
"form-data-encoder": "1.7.2",
|
||||
"formdata-node": "^4.3.2",
|
||||
"node-fetch": "^2.6.7"
|
||||
},
|
||||
"bin": {
|
||||
"openai": "bin/cli"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"ws": "^8.18.0",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"ws": {
|
||||
"optional": true
|
||||
},
|
||||
"zod": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/openai/node_modules/@types/node": {
|
||||
"version": "18.19.130",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz",
|
||||
"integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/openai/node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/pac-proxy-agent": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||
|
|
@ -4466,7 +4643,6 @@
|
|||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/universalify": {
|
||||
|
|
@ -4526,6 +4702,15 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/web-streams-polyfill": {
|
||||
"version": "3.3.3",
|
||||
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
|
||||
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/webidl-conversions": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
"db:init": "tsx src/config/init-db.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.24.0",
|
||||
"axios": "^1.6.0",
|
||||
"bcrypt": "^5.1.1",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
|
|
@ -18,6 +19,7 @@
|
|||
"express": "^4.18.2",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"node-cron": "^3.0.3",
|
||||
"openai": "^4.47.0",
|
||||
"pg": "^8.11.3",
|
||||
"puppeteer": "^22.0.0",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
|
|
|
|||
|
|
@ -9,6 +9,37 @@ import settingsRoutes from './routes/settings';
|
|||
import profileRoutes from './routes/profile';
|
||||
import adminRoutes from './routes/admin';
|
||||
import { startScheduler } from './services/scheduler';
|
||||
import pool from './config/database';
|
||||
|
||||
// Run database migrations
|
||||
async function runMigrations() {
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
// Add AI settings columns to users table if they don't exist
|
||||
await client.query(`
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_enabled') THEN
|
||||
ALTER TABLE users ADD COLUMN ai_enabled BOOLEAN DEFAULT false;
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'ai_provider') THEN
|
||||
ALTER TABLE users ADD COLUMN ai_provider VARCHAR(20);
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'anthropic_api_key') THEN
|
||||
ALTER TABLE users ADD COLUMN anthropic_api_key TEXT;
|
||||
END IF;
|
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.columns WHERE table_name = 'users' AND column_name = 'openai_api_key') THEN
|
||||
ALTER TABLE users ADD COLUMN openai_api_key TEXT;
|
||||
END IF;
|
||||
END $$;
|
||||
`);
|
||||
console.log('Database migrations completed');
|
||||
} catch (error) {
|
||||
console.error('Migration error:', error);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
// Load environment variables
|
||||
dotenv.config();
|
||||
|
|
@ -47,9 +78,12 @@ app.use(
|
|||
);
|
||||
|
||||
// Start server
|
||||
app.listen(PORT, () => {
|
||||
app.listen(PORT, async () => {
|
||||
console.log(`PriceGhost API server running on port ${PORT}`);
|
||||
|
||||
// Run database migrations
|
||||
await runMigrations();
|
||||
|
||||
// Start the background price checker
|
||||
if (process.env.NODE_ENV !== 'test') {
|
||||
startScheduler();
|
||||
|
|
|
|||
|
|
@ -27,6 +27,13 @@ export interface NotificationSettings {
|
|||
discord_webhook_url: string | null;
|
||||
}
|
||||
|
||||
export interface AISettings {
|
||||
ai_enabled: boolean;
|
||||
ai_provider: 'anthropic' | 'openai' | null;
|
||||
anthropic_api_key: string | null;
|
||||
openai_api_key: string | null;
|
||||
}
|
||||
|
||||
export const userQueries = {
|
||||
findByEmail: async (email: string): Promise<User | null> => {
|
||||
const result = await pool.query(
|
||||
|
|
@ -155,6 +162,50 @@ export const userQueries = {
|
|||
);
|
||||
return (result.rowCount ?? 0) > 0;
|
||||
},
|
||||
|
||||
getAISettings: async (id: number): Promise<AISettings | null> => {
|
||||
const result = await pool.query(
|
||||
'SELECT ai_enabled, ai_provider, anthropic_api_key, openai_api_key FROM users WHERE id = $1',
|
||||
[id]
|
||||
);
|
||||
return result.rows[0] || null;
|
||||
},
|
||||
|
||||
updateAISettings: async (
|
||||
id: number,
|
||||
settings: Partial<AISettings>
|
||||
): Promise<AISettings | null> => {
|
||||
const fields: string[] = [];
|
||||
const values: (string | boolean | null)[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (settings.ai_enabled !== undefined) {
|
||||
fields.push(`ai_enabled = $${paramIndex++}`);
|
||||
values.push(settings.ai_enabled);
|
||||
}
|
||||
if (settings.ai_provider !== undefined) {
|
||||
fields.push(`ai_provider = $${paramIndex++}`);
|
||||
values.push(settings.ai_provider);
|
||||
}
|
||||
if (settings.anthropic_api_key !== undefined) {
|
||||
fields.push(`anthropic_api_key = $${paramIndex++}`);
|
||||
values.push(settings.anthropic_api_key);
|
||||
}
|
||||
if (settings.openai_api_key !== undefined) {
|
||||
fields.push(`openai_api_key = $${paramIndex++}`);
|
||||
values.push(settings.openai_api_key);
|
||||
}
|
||||
|
||||
if (fields.length === 0) return null;
|
||||
|
||||
values.push(id.toString());
|
||||
const result = await pool.query(
|
||||
`UPDATE users SET ${fields.join(', ')} WHERE id = $${paramIndex}
|
||||
RETURNING ai_enabled, ai_provider, anthropic_api_key, openai_api_key`,
|
||||
values
|
||||
);
|
||||
return result.rows[0] || null;
|
||||
},
|
||||
};
|
||||
|
||||
// System settings queries
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ router.post('/', async (req: AuthRequest, res: Response) => {
|
|||
return;
|
||||
}
|
||||
|
||||
// Scrape product info
|
||||
const scrapedData = await scrapeProduct(url);
|
||||
// Scrape product info (pass userId for AI fallback)
|
||||
const scrapedData = await scrapeProduct(url, userId);
|
||||
|
||||
// Allow adding out-of-stock products, but require a price for in-stock ones
|
||||
if (!scrapedData.price && scrapedData.stockStatus !== 'out_of_stock') {
|
||||
|
|
|
|||
|
|
@ -127,4 +127,89 @@ router.post('/notifications/test/discord', async (req: AuthRequest, res: Respons
|
|||
}
|
||||
});
|
||||
|
||||
// Get AI settings
|
||||
router.get('/ai', async (req: AuthRequest, res: Response) => {
|
||||
try {
|
||||
const userId = req.userId!;
|
||||
const settings = await userQueries.getAISettings(userId);
|
||||
|
||||
if (!settings) {
|
||||
res.status(404).json({ error: 'User not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't expose full API keys, just indicate if they're set
|
||||
res.json({
|
||||
ai_enabled: settings.ai_enabled || false,
|
||||
ai_provider: settings.ai_provider || null,
|
||||
anthropic_configured: !!settings.anthropic_api_key,
|
||||
openai_configured: !!settings.openai_api_key,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error fetching AI settings:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch AI settings' });
|
||||
}
|
||||
});
|
||||
|
||||
// Update AI settings
|
||||
router.put('/ai', async (req: AuthRequest, res: Response) => {
|
||||
try {
|
||||
const userId = req.userId!;
|
||||
const { ai_enabled, ai_provider, anthropic_api_key, openai_api_key } = req.body;
|
||||
|
||||
const settings = await userQueries.updateAISettings(userId, {
|
||||
ai_enabled,
|
||||
ai_provider,
|
||||
anthropic_api_key,
|
||||
openai_api_key,
|
||||
});
|
||||
|
||||
if (!settings) {
|
||||
res.status(400).json({ error: 'No settings to update' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({
|
||||
ai_enabled: settings.ai_enabled || false,
|
||||
ai_provider: settings.ai_provider || null,
|
||||
anthropic_configured: !!settings.anthropic_api_key,
|
||||
openai_configured: !!settings.openai_api_key,
|
||||
message: 'AI settings updated successfully',
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error updating AI settings:', error);
|
||||
res.status(500).json({ error: 'Failed to update AI settings' });
|
||||
}
|
||||
});
|
||||
|
||||
// Test AI extraction
|
||||
router.post('/ai/test', async (req: AuthRequest, res: Response) => {
|
||||
try {
|
||||
const userId = req.userId!;
|
||||
const { url } = req.body;
|
||||
|
||||
if (!url) {
|
||||
res.status(400).json({ error: 'URL is required' });
|
||||
return;
|
||||
}
|
||||
|
||||
const settings = await userQueries.getAISettings(userId);
|
||||
if (!settings?.ai_enabled) {
|
||||
res.status(400).json({ error: 'AI extraction is not enabled' });
|
||||
return;
|
||||
}
|
||||
|
||||
const { extractWithAI } = await import('../services/ai-extractor');
|
||||
const result = await extractWithAI(url, settings);
|
||||
|
||||
res.json({
|
||||
success: !!result.price,
|
||||
...result,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error testing AI extraction:', error);
|
||||
res.status(500).json({ error: 'Failed to test AI extraction' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
|
|
|||
258
backend/src/services/ai-extractor.ts
Normal file
258
backend/src/services/ai-extractor.ts
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
import Anthropic from '@anthropic-ai/sdk';
|
||||
import OpenAI from 'openai';
|
||||
import axios from 'axios';
|
||||
import { load } from 'cheerio';
|
||||
import { AISettings } from '../models';
|
||||
import { ParsedPrice } from '../utils/priceParser';
|
||||
import { StockStatus } from './scraper';
|
||||
|
||||
export interface AIExtractionResult {
|
||||
name: string | null;
|
||||
price: ParsedPrice | null;
|
||||
imageUrl: string | null;
|
||||
stockStatus: StockStatus;
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
const EXTRACTION_PROMPT = `You are a price extraction assistant. Analyze the following HTML content from a product page and extract the product information.
|
||||
|
||||
Return a JSON object with these fields:
|
||||
- name: The product name/title (string or null)
|
||||
- price: The current selling price as a number (not the original/crossed-out price)
|
||||
- currency: The currency code (USD, EUR, GBP, etc.)
|
||||
- imageUrl: The main product image URL (string or null)
|
||||
- stockStatus: One of "in_stock", "out_of_stock", or "unknown"
|
||||
- confidence: Your confidence in the extraction from 0 to 1
|
||||
|
||||
Important:
|
||||
- Extract the CURRENT/SALE price, not the original price if there's a discount
|
||||
- If you can't find a price with confidence, set price to null
|
||||
- Only return valid JSON, no explanation text
|
||||
|
||||
HTML Content:
|
||||
`;
|
||||
|
||||
// Truncate HTML to fit within token limits while preserving important content
|
||||
function prepareHtmlForAI(html: string): string {
|
||||
const $ = load(html);
|
||||
|
||||
// Remove script, style, and other non-content elements
|
||||
$('script, style, noscript, iframe, svg, path, meta, link, comment').remove();
|
||||
|
||||
// Get the body content
|
||||
let content = $('body').html() || html;
|
||||
|
||||
// Try to focus on product-related sections if possible
|
||||
const productSelectors = [
|
||||
'[itemtype*="Product"]',
|
||||
'[class*="product"]',
|
||||
'[id*="product"]',
|
||||
'[class*="pdp"]',
|
||||
'main',
|
||||
'[role="main"]',
|
||||
];
|
||||
|
||||
for (const selector of productSelectors) {
|
||||
const section = $(selector).first();
|
||||
if (section.length && section.html() && section.html()!.length > 500) {
|
||||
content = section.html()!;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Also extract JSON-LD data which often contains product info
|
||||
const jsonLdScripts: string[] = [];
|
||||
$('script[type="application/ld+json"]').each((_, el) => {
|
||||
const scriptContent = $(el).html();
|
||||
if (scriptContent && scriptContent.includes('price')) {
|
||||
jsonLdScripts.push(scriptContent);
|
||||
}
|
||||
});
|
||||
|
||||
// Combine content with JSON-LD data
|
||||
let finalContent = content;
|
||||
if (jsonLdScripts.length > 0) {
|
||||
finalContent = `JSON-LD Data:\n${jsonLdScripts.join('\n')}\n\nHTML Content:\n${content}`;
|
||||
}
|
||||
|
||||
// Truncate to ~15000 characters to stay within token limits
|
||||
if (finalContent.length > 15000) {
|
||||
finalContent = finalContent.substring(0, 15000) + '\n... [truncated]';
|
||||
}
|
||||
|
||||
return finalContent;
|
||||
}
|
||||
|
||||
async function extractWithAnthropic(
|
||||
html: string,
|
||||
apiKey: string
|
||||
): Promise<AIExtractionResult> {
|
||||
const anthropic = new Anthropic({ apiKey });
|
||||
|
||||
const preparedHtml = prepareHtmlForAI(html);
|
||||
|
||||
const response = await anthropic.messages.create({
|
||||
model: 'claude-3-haiku-20240307',
|
||||
max_tokens: 1024,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: EXTRACTION_PROMPT + preparedHtml,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const content = response.content[0];
|
||||
if (content.type !== 'text') {
|
||||
throw new Error('Unexpected response type from Anthropic');
|
||||
}
|
||||
|
||||
return parseAIResponse(content.text);
|
||||
}
|
||||
|
||||
async function extractWithOpenAI(
|
||||
html: string,
|
||||
apiKey: string
|
||||
): Promise<AIExtractionResult> {
|
||||
const openai = new OpenAI({ apiKey });
|
||||
|
||||
const preparedHtml = prepareHtmlForAI(html);
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model: 'gpt-4o-mini',
|
||||
max_tokens: 1024,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: EXTRACTION_PROMPT + preparedHtml,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const content = response.choices[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new Error('No response from OpenAI');
|
||||
}
|
||||
|
||||
return parseAIResponse(content);
|
||||
}
|
||||
|
||||
function parseAIResponse(responseText: string): AIExtractionResult {
|
||||
// Try to extract JSON from the response
|
||||
let jsonStr = responseText.trim();
|
||||
|
||||
// Handle markdown code blocks
|
||||
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (jsonMatch) {
|
||||
jsonStr = jsonMatch[1].trim();
|
||||
}
|
||||
|
||||
// Try to find JSON object in the response
|
||||
const objectMatch = jsonStr.match(/\{[\s\S]*\}/);
|
||||
if (objectMatch) {
|
||||
jsonStr = objectMatch[0];
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(jsonStr);
|
||||
|
||||
let price: ParsedPrice | null = null;
|
||||
if (data.price !== null && data.price !== undefined) {
|
||||
const priceNum = typeof data.price === 'string'
|
||||
? parseFloat(data.price.replace(/[^0-9.]/g, ''))
|
||||
: data.price;
|
||||
|
||||
if (!isNaN(priceNum) && priceNum > 0) {
|
||||
price = {
|
||||
price: priceNum,
|
||||
currency: data.currency || 'USD',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let stockStatus: StockStatus = 'unknown';
|
||||
if (data.stockStatus) {
|
||||
const status = data.stockStatus.toLowerCase().replace(/[^a-z_]/g, '');
|
||||
if (status === 'in_stock' || status === 'instock') {
|
||||
stockStatus = 'in_stock';
|
||||
} else if (status === 'out_of_stock' || status === 'outofstock') {
|
||||
stockStatus = 'out_of_stock';
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name: data.name || null,
|
||||
price,
|
||||
imageUrl: data.imageUrl || data.image || null,
|
||||
stockStatus,
|
||||
confidence: data.confidence || 0.5,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Failed to parse AI response:', responseText);
|
||||
return {
|
||||
name: null,
|
||||
price: null,
|
||||
imageUrl: null,
|
||||
stockStatus: 'unknown',
|
||||
confidence: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export async function extractWithAI(
|
||||
url: string,
|
||||
settings: AISettings
|
||||
): Promise<AIExtractionResult> {
|
||||
// Fetch the page HTML
|
||||
const response = await axios.get<string>(url, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||
},
|
||||
timeout: 20000,
|
||||
});
|
||||
|
||||
const html = response.data;
|
||||
|
||||
// Use the configured provider
|
||||
if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
|
||||
return extractWithAnthropic(html, settings.anthropic_api_key);
|
||||
} else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
|
||||
return extractWithOpenAI(html, settings.openai_api_key);
|
||||
}
|
||||
|
||||
throw new Error('No valid AI provider configured');
|
||||
}
|
||||
|
||||
// Export for use in scraper as fallback
|
||||
export async function tryAIExtraction(
|
||||
url: string,
|
||||
html: string,
|
||||
userId: number
|
||||
): Promise<AIExtractionResult | null> {
|
||||
try {
|
||||
// Import dynamically to avoid circular dependencies
|
||||
const { userQueries } = await import('../models');
|
||||
const settings = await userQueries.getAISettings(userId);
|
||||
|
||||
if (!settings?.ai_enabled) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Use the configured provider
|
||||
if (settings.ai_provider === 'anthropic' && settings.anthropic_api_key) {
|
||||
console.log(`[AI] Using Anthropic for ${url}`);
|
||||
return await extractWithAnthropic(html, settings.anthropic_api_key);
|
||||
} else if (settings.ai_provider === 'openai' && settings.openai_api_key) {
|
||||
console.log(`[AI] Using OpenAI for ${url}`);
|
||||
return await extractWithOpenAI(html, settings.openai_api_key);
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error(`[AI] Extraction failed for ${url}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@ async function checkPrices(): Promise<void> {
|
|||
try {
|
||||
console.log(`Checking price for product ${product.id}: ${product.url}`);
|
||||
|
||||
const scrapedData = await scrapeProduct(product.url);
|
||||
const scrapedData = await scrapeProduct(product.url, product.user_id);
|
||||
|
||||
// Check for back-in-stock notification
|
||||
const wasOutOfStock = product.stock_status === 'out_of_stock';
|
||||
|
|
|
|||
|
|
@ -664,7 +664,7 @@ const genericImageSelectors = [
|
|||
'img[class*="product"]',
|
||||
];
|
||||
|
||||
export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
||||
export async function scrapeProduct(url: string, userId?: number): Promise<ScrapedProduct> {
|
||||
const result: ScrapedProduct = {
|
||||
name: null,
|
||||
price: null,
|
||||
|
|
@ -673,8 +673,9 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
|||
stockStatus: 'unknown',
|
||||
};
|
||||
|
||||
let html: string = '';
|
||||
|
||||
try {
|
||||
let html: string;
|
||||
let usedBrowser = false;
|
||||
|
||||
try {
|
||||
|
|
@ -766,6 +767,26 @@ export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
|||
if (!result.imageUrl) {
|
||||
result.imageUrl = $('meta[property="og:image"]').attr('content') || null;
|
||||
}
|
||||
|
||||
// If we still don't have a price and userId is provided, try AI extraction
|
||||
if (!result.price && userId && html) {
|
||||
try {
|
||||
const { tryAIExtraction } = await import('./ai-extractor');
|
||||
const aiResult = await tryAIExtraction(url, html, userId);
|
||||
|
||||
if (aiResult && aiResult.price && aiResult.confidence > 0.5) {
|
||||
console.log(`[AI] Successfully extracted price for ${url}: ${aiResult.price.price} (confidence: ${aiResult.confidence})`);
|
||||
result.price = aiResult.price;
|
||||
if (!result.name && aiResult.name) result.name = aiResult.name;
|
||||
if (!result.imageUrl && aiResult.imageUrl) result.imageUrl = aiResult.imageUrl;
|
||||
if (result.stockStatus === 'unknown' && aiResult.stockStatus !== 'unknown') {
|
||||
result.stockStatus = aiResult.stockStatus;
|
||||
}
|
||||
}
|
||||
} catch (aiError) {
|
||||
console.error(`[AI] Extraction failed for ${url}:`, aiError);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error scraping ${url}:`, error);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue