mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-05-10 00:02:40 +02:00
Initial commit: PriceGhost price tracking application
Full-stack application for tracking product prices: - Backend: Node.js + Express + TypeScript - Frontend: React + Vite + TypeScript - Database: PostgreSQL - Price scraping with Cheerio - JWT authentication - Background price checking with node-cron - Price history charts with Recharts - Docker support with docker-compose Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
commit
10660e5626
44 changed files with 3662 additions and 0 deletions
76
backend/src/services/scheduler.ts
Normal file
76
backend/src/services/scheduler.ts
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
import cron from 'node-cron';
|
||||
import { productQueries, priceHistoryQueries } from '../models';
|
||||
import { scrapePrice } from './scraper';
|
||||
|
||||
let isRunning = false;
|
||||
|
||||
async function checkPrices(): Promise<void> {
|
||||
if (isRunning) {
|
||||
console.log('Price check already in progress, skipping...');
|
||||
return;
|
||||
}
|
||||
|
||||
isRunning = true;
|
||||
console.log('Starting scheduled price check...');
|
||||
|
||||
try {
|
||||
// Find all products that are due for a refresh
|
||||
const products = await productQueries.findDueForRefresh();
|
||||
console.log(`Found ${products.length} products to check`);
|
||||
|
||||
for (const product of products) {
|
||||
try {
|
||||
console.log(`Checking price for product ${product.id}: ${product.url}`);
|
||||
|
||||
const priceData = await scrapePrice(product.url);
|
||||
|
||||
if (priceData) {
|
||||
// Get the latest recorded price to compare
|
||||
const latestPrice = await priceHistoryQueries.getLatest(product.id);
|
||||
|
||||
// Only record if price has changed or it's the first entry
|
||||
if (!latestPrice || latestPrice.price !== priceData.price) {
|
||||
await priceHistoryQueries.create(
|
||||
product.id,
|
||||
priceData.price,
|
||||
priceData.currency
|
||||
);
|
||||
console.log(
|
||||
`Recorded new price for product ${product.id}: ${priceData.currency} ${priceData.price}`
|
||||
);
|
||||
} else {
|
||||
console.log(`Price unchanged for product ${product.id}`);
|
||||
}
|
||||
} else {
|
||||
console.warn(`Could not extract price for product ${product.id}`);
|
||||
}
|
||||
|
||||
// Update last_checked even if price extraction failed
|
||||
await productQueries.updateLastChecked(product.id);
|
||||
|
||||
// Add a small delay between requests to avoid rate limiting
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
} catch (error) {
|
||||
console.error(`Error checking product ${product.id}:`, error);
|
||||
// Continue with next product even if one fails
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error in scheduled price check:', error);
|
||||
} finally {
|
||||
isRunning = false;
|
||||
console.log('Scheduled price check complete');
|
||||
}
|
||||
}
|
||||
|
||||
export function startScheduler(): void {
|
||||
// Run every minute
|
||||
cron.schedule('* * * * *', () => {
|
||||
checkPrices().catch(console.error);
|
||||
});
|
||||
|
||||
console.log('Price check scheduler started (runs every minute)');
|
||||
}
|
||||
|
||||
// Allow manual trigger for testing
|
||||
export { checkPrices };
|
||||
267
backend/src/services/scraper.ts
Normal file
267
backend/src/services/scraper.ts
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import {
|
||||
parsePrice,
|
||||
ParsedPrice,
|
||||
findMostLikelyPrice,
|
||||
} from '../utils/priceParser';
|
||||
|
||||
export interface ScrapedProduct {
|
||||
name: string | null;
|
||||
price: ParsedPrice | null;
|
||||
imageUrl: string | null;
|
||||
url: string;
|
||||
}
|
||||
|
||||
// Common price selectors used across e-commerce sites
|
||||
const priceSelectors = [
|
||||
// Schema.org
|
||||
'[itemprop="price"]',
|
||||
'[data-price]',
|
||||
'[data-product-price]',
|
||||
|
||||
// Common class names
|
||||
'.price',
|
||||
'.product-price',
|
||||
'.current-price',
|
||||
'.sale-price',
|
||||
'.final-price',
|
||||
'.offer-price',
|
||||
'#price',
|
||||
'#priceblock_ourprice',
|
||||
'#priceblock_dealprice',
|
||||
'#priceblock_saleprice',
|
||||
|
||||
// Amazon specific
|
||||
'.a-price .a-offscreen',
|
||||
'.a-price-whole',
|
||||
'#corePrice_feature_div .a-price .a-offscreen',
|
||||
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
|
||||
|
||||
// Generic patterns
|
||||
'[class*="price"]',
|
||||
'[class*="Price"]',
|
||||
'[id*="price"]',
|
||||
'[id*="Price"]',
|
||||
];
|
||||
|
||||
// Selectors for product name
|
||||
const nameSelectors = [
|
||||
'[itemprop="name"]',
|
||||
'h1[class*="product"]',
|
||||
'h1[class*="title"]',
|
||||
'#productTitle',
|
||||
'.product-title',
|
||||
'.product-name',
|
||||
'h1',
|
||||
];
|
||||
|
||||
// Selectors for product image
|
||||
const imageSelectors = [
|
||||
'[itemprop="image"]',
|
||||
'[property="og:image"]',
|
||||
'#landingImage',
|
||||
'#imgBlkFront',
|
||||
'.product-image img',
|
||||
'.main-image img',
|
||||
'[data-zoom-image]',
|
||||
'img[class*="product"]',
|
||||
];
|
||||
|
||||
export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
|
||||
const result: ScrapedProduct = {
|
||||
name: null,
|
||||
price: null,
|
||||
imageUrl: null,
|
||||
url,
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
headers: {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
Connection: 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
},
|
||||
timeout: 15000,
|
||||
maxRedirects: 5,
|
||||
});
|
||||
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// Try to extract from JSON-LD structured data first
|
||||
const jsonLdData = extractJsonLd($);
|
||||
if (jsonLdData) {
|
||||
if (jsonLdData.name) result.name = jsonLdData.name;
|
||||
if (jsonLdData.price) result.price = jsonLdData.price;
|
||||
if (jsonLdData.image) result.imageUrl = jsonLdData.image;
|
||||
}
|
||||
|
||||
// Extract product name
|
||||
if (!result.name) {
|
||||
result.name = extractName($);
|
||||
}
|
||||
|
||||
// Extract price
|
||||
if (!result.price) {
|
||||
result.price = extractPrice($);
|
||||
}
|
||||
|
||||
// Extract image
|
||||
if (!result.imageUrl) {
|
||||
result.imageUrl = extractImage($, url);
|
||||
}
|
||||
|
||||
// Try Open Graph meta tags as fallback
|
||||
if (!result.name) {
|
||||
result.name = $('meta[property="og:title"]').attr('content') || null;
|
||||
}
|
||||
if (!result.imageUrl) {
|
||||
result.imageUrl = $('meta[property="og:image"]').attr('content') || null;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error scraping ${url}:`, error);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function extractJsonLd(
|
||||
$: cheerio.CheerioAPI
|
||||
): { name?: string; price?: ParsedPrice; image?: string } | null {
|
||||
try {
|
||||
const scripts = $('script[type="application/ld+json"]');
|
||||
for (let i = 0; i < scripts.length; i++) {
|
||||
const content = $(scripts[i]).html();
|
||||
if (!content) continue;
|
||||
|
||||
const data = JSON.parse(content);
|
||||
const product = findProduct(data);
|
||||
|
||||
if (product) {
|
||||
const result: { name?: string; price?: ParsedPrice; image?: string } =
|
||||
{};
|
||||
|
||||
if (product.name) {
|
||||
result.name = product.name;
|
||||
}
|
||||
|
||||
if (product.offers) {
|
||||
const offer = Array.isArray(product.offers)
|
||||
? product.offers[0]
|
||||
: product.offers;
|
||||
if (offer.price) {
|
||||
result.price = {
|
||||
price: parseFloat(offer.price),
|
||||
currency: offer.priceCurrency || 'USD',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (product.image) {
|
||||
result.image = Array.isArray(product.image)
|
||||
? product.image[0]
|
||||
: typeof product.image === 'string'
|
||||
? product.image
|
||||
: product.image.url;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// JSON parse error, continue with other methods
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function findProduct(data: unknown): Record<string, unknown> | null {
|
||||
if (!data || typeof data !== 'object') return null;
|
||||
|
||||
const obj = data as Record<string, unknown>;
|
||||
|
||||
if (obj['@type'] === 'Product') {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (Array.isArray(data)) {
|
||||
for (const item of data) {
|
||||
const found = findProduct(item);
|
||||
if (found) return found;
|
||||
}
|
||||
}
|
||||
|
||||
if (obj['@graph'] && Array.isArray(obj['@graph'])) {
|
||||
for (const item of obj['@graph']) {
|
||||
const found = findProduct(item);
|
||||
if (found) return found;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null {
|
||||
const prices: ParsedPrice[] = [];
|
||||
|
||||
for (const selector of priceSelectors) {
|
||||
const elements = $(selector);
|
||||
elements.each((_, el) => {
|
||||
const text =
|
||||
$(el).attr('content') || $(el).attr('data-price') || $(el).text();
|
||||
const parsed = parsePrice(text);
|
||||
if (parsed) {
|
||||
prices.push(parsed);
|
||||
}
|
||||
});
|
||||
|
||||
if (prices.length > 0) break;
|
||||
}
|
||||
|
||||
return findMostLikelyPrice(prices);
|
||||
}
|
||||
|
||||
function extractName($: cheerio.CheerioAPI): string | null {
|
||||
for (const selector of nameSelectors) {
|
||||
const element = $(selector).first();
|
||||
if (element.length) {
|
||||
const text = element.text().trim();
|
||||
if (text && text.length > 0 && text.length < 500) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null {
|
||||
for (const selector of imageSelectors) {
|
||||
const element = $(selector).first();
|
||||
if (element.length) {
|
||||
const src =
|
||||
element.attr('src') ||
|
||||
element.attr('content') ||
|
||||
element.attr('data-zoom-image') ||
|
||||
element.attr('data-src');
|
||||
if (src) {
|
||||
// Handle relative URLs
|
||||
try {
|
||||
return new URL(src, baseUrl).href;
|
||||
} catch {
|
||||
return src;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function scrapePrice(url: string): Promise<ParsedPrice | null> {
|
||||
const product = await scrapeProduct(url);
|
||||
return product.price;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue