Initial commit: PriceGhost price tracking application

Full-stack application for tracking product prices:
- Backend: Node.js + Express + TypeScript
- Frontend: React + Vite + TypeScript
- Database: PostgreSQL
- Price scraping with Cheerio
- JWT authentication
- Background price checking with node-cron
- Price history charts with Recharts
- Docker support with docker-compose

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-20 13:58:13 -05:00
commit 10660e5626
44 changed files with 3662 additions and 0 deletions

View file

@ -0,0 +1,76 @@
import cron from 'node-cron';
import { productQueries, priceHistoryQueries } from '../models';
import { scrapePrice } from './scraper';
let isRunning = false;
async function checkPrices(): Promise<void> {
if (isRunning) {
console.log('Price check already in progress, skipping...');
return;
}
isRunning = true;
console.log('Starting scheduled price check...');
try {
// Find all products that are due for a refresh
const products = await productQueries.findDueForRefresh();
console.log(`Found ${products.length} products to check`);
for (const product of products) {
try {
console.log(`Checking price for product ${product.id}: ${product.url}`);
const priceData = await scrapePrice(product.url);
if (priceData) {
// Get the latest recorded price to compare
const latestPrice = await priceHistoryQueries.getLatest(product.id);
// Only record if price has changed or it's the first entry
if (!latestPrice || latestPrice.price !== priceData.price) {
await priceHistoryQueries.create(
product.id,
priceData.price,
priceData.currency
);
console.log(
`Recorded new price for product ${product.id}: ${priceData.currency} ${priceData.price}`
);
} else {
console.log(`Price unchanged for product ${product.id}`);
}
} else {
console.warn(`Could not extract price for product ${product.id}`);
}
// Update last_checked even if price extraction failed
await productQueries.updateLastChecked(product.id);
// Add a small delay between requests to avoid rate limiting
await new Promise((resolve) => setTimeout(resolve, 2000));
} catch (error) {
console.error(`Error checking product ${product.id}:`, error);
// Continue with next product even if one fails
}
}
} catch (error) {
console.error('Error in scheduled price check:', error);
} finally {
isRunning = false;
console.log('Scheduled price check complete');
}
}
export function startScheduler(): void {
// Run every minute
cron.schedule('* * * * *', () => {
checkPrices().catch(console.error);
});
console.log('Price check scheduler started (runs every minute)');
}
// Allow manual trigger for testing
export { checkPrices };

View file

@ -0,0 +1,267 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import {
parsePrice,
ParsedPrice,
findMostLikelyPrice,
} from '../utils/priceParser';
export interface ScrapedProduct {
name: string | null;
price: ParsedPrice | null;
imageUrl: string | null;
url: string;
}
// Common price selectors used across e-commerce sites
const priceSelectors = [
// Schema.org
'[itemprop="price"]',
'[data-price]',
'[data-product-price]',
// Common class names
'.price',
'.product-price',
'.current-price',
'.sale-price',
'.final-price',
'.offer-price',
'#price',
'#priceblock_ourprice',
'#priceblock_dealprice',
'#priceblock_saleprice',
// Amazon specific
'.a-price .a-offscreen',
'.a-price-whole',
'#corePrice_feature_div .a-price .a-offscreen',
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
// Generic patterns
'[class*="price"]',
'[class*="Price"]',
'[id*="price"]',
'[id*="Price"]',
];
// Selectors for product name
const nameSelectors = [
'[itemprop="name"]',
'h1[class*="product"]',
'h1[class*="title"]',
'#productTitle',
'.product-title',
'.product-name',
'h1',
];
// Selectors for product image
const imageSelectors = [
'[itemprop="image"]',
'[property="og:image"]',
'#landingImage',
'#imgBlkFront',
'.product-image img',
'.main-image img',
'[data-zoom-image]',
'img[class*="product"]',
];
export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
const result: ScrapedProduct = {
name: null,
price: null,
imageUrl: null,
url,
};
try {
const response = await axios.get(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
Connection: 'keep-alive',
'Upgrade-Insecure-Requests': '1',
},
timeout: 15000,
maxRedirects: 5,
});
const $ = cheerio.load(response.data);
// Try to extract from JSON-LD structured data first
const jsonLdData = extractJsonLd($);
if (jsonLdData) {
if (jsonLdData.name) result.name = jsonLdData.name;
if (jsonLdData.price) result.price = jsonLdData.price;
if (jsonLdData.image) result.imageUrl = jsonLdData.image;
}
// Extract product name
if (!result.name) {
result.name = extractName($);
}
// Extract price
if (!result.price) {
result.price = extractPrice($);
}
// Extract image
if (!result.imageUrl) {
result.imageUrl = extractImage($, url);
}
// Try Open Graph meta tags as fallback
if (!result.name) {
result.name = $('meta[property="og:title"]').attr('content') || null;
}
if (!result.imageUrl) {
result.imageUrl = $('meta[property="og:image"]').attr('content') || null;
}
} catch (error) {
console.error(`Error scraping ${url}:`, error);
}
return result;
}
function extractJsonLd(
$: cheerio.CheerioAPI
): { name?: string; price?: ParsedPrice; image?: string } | null {
try {
const scripts = $('script[type="application/ld+json"]');
for (let i = 0; i < scripts.length; i++) {
const content = $(scripts[i]).html();
if (!content) continue;
const data = JSON.parse(content);
const product = findProduct(data);
if (product) {
const result: { name?: string; price?: ParsedPrice; image?: string } =
{};
if (product.name) {
result.name = product.name;
}
if (product.offers) {
const offer = Array.isArray(product.offers)
? product.offers[0]
: product.offers;
if (offer.price) {
result.price = {
price: parseFloat(offer.price),
currency: offer.priceCurrency || 'USD',
};
}
}
if (product.image) {
result.image = Array.isArray(product.image)
? product.image[0]
: typeof product.image === 'string'
? product.image
: product.image.url;
}
return result;
}
}
} catch {
// JSON parse error, continue with other methods
}
return null;
}
function findProduct(data: unknown): Record<string, unknown> | null {
if (!data || typeof data !== 'object') return null;
const obj = data as Record<string, unknown>;
if (obj['@type'] === 'Product') {
return obj;
}
if (Array.isArray(data)) {
for (const item of data) {
const found = findProduct(item);
if (found) return found;
}
}
if (obj['@graph'] && Array.isArray(obj['@graph'])) {
for (const item of obj['@graph']) {
const found = findProduct(item);
if (found) return found;
}
}
return null;
}
function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null {
const prices: ParsedPrice[] = [];
for (const selector of priceSelectors) {
const elements = $(selector);
elements.each((_, el) => {
const text =
$(el).attr('content') || $(el).attr('data-price') || $(el).text();
const parsed = parsePrice(text);
if (parsed) {
prices.push(parsed);
}
});
if (prices.length > 0) break;
}
return findMostLikelyPrice(prices);
}
function extractName($: cheerio.CheerioAPI): string | null {
for (const selector of nameSelectors) {
const element = $(selector).first();
if (element.length) {
const text = element.text().trim();
if (text && text.length > 0 && text.length < 500) {
return text;
}
}
}
return null;
}
function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null {
for (const selector of imageSelectors) {
const element = $(selector).first();
if (element.length) {
const src =
element.attr('src') ||
element.attr('content') ||
element.attr('data-zoom-image') ||
element.attr('data-src');
if (src) {
// Handle relative URLs
try {
return new URL(src, baseUrl).href;
} catch {
return src;
}
}
}
}
return null;
}
export async function scrapePrice(url: string): Promise<ParsedPrice | null> {
const product = await scrapeProduct(url);
return product.price;
}