Initial commit: PriceGhost price tracking application

Full-stack application for tracking product prices:
- Backend: Node.js + Express + TypeScript
- Frontend: React + Vite + TypeScript
- Database: PostgreSQL
- Price scraping with Cheerio
- JWT authentication
- Background price checking with node-cron
- Price history charts with Recharts
- Docker support with docker-compose

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
clucraft 2026-01-20 13:58:13 -05:00
commit 10660e5626
44 changed files with 3662 additions and 0 deletions

View file

@ -0,0 +1,19 @@
import { Pool } from 'pg';
import dotenv from 'dotenv';
dotenv.config();
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
pool.on('connect', () => {
console.log('Connected to PostgreSQL database');
});
pool.on('error', (err) => {
console.error('Unexpected error on idle client', err);
process.exit(-1);
});
export default pool;

View file

@ -0,0 +1,67 @@
import pool from './database';
const initDatabase = async () => {
const client = await pool.connect();
try {
await client.query('BEGIN');
// Create users table
await client.query(`
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
console.log('Created users table');
// Create products table
await client.query(`
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
url TEXT NOT NULL,
name VARCHAR(255),
image_url TEXT,
refresh_interval INTEGER DEFAULT 3600,
last_checked TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(user_id, url)
);
`);
console.log('Created products table');
// Create price_history table
await client.query(`
CREATE TABLE IF NOT EXISTS price_history (
id SERIAL PRIMARY KEY,
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
price DECIMAL(10,2) NOT NULL,
currency VARCHAR(10) DEFAULT 'USD',
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
console.log('Created price_history table');
// Create index for price history queries
await client.query(`
CREATE INDEX IF NOT EXISTS idx_price_history_product_date
ON price_history(product_id, recorded_at);
`);
console.log('Created price_history index');
await client.query('COMMIT');
console.log('Database initialization complete');
} catch (error) {
await client.query('ROLLBACK');
console.error('Error initializing database:', error);
throw error;
} finally {
client.release();
await pool.end();
}
};
initDatabase().catch(console.error);

53
backend/src/index.ts Normal file
View file

@ -0,0 +1,53 @@
import express from 'express';
import cors from 'cors';
import dotenv from 'dotenv';
import authRoutes from './routes/auth';
import productRoutes from './routes/products';
import priceRoutes from './routes/prices';
import { startScheduler } from './services/scheduler';
// Load environment variables
dotenv.config();
const app = express();
const PORT = process.env.PORT || 3001;
// Middleware
app.use(cors());
app.use(express.json());
// Health check endpoint
app.get('/health', (_, res) => {
res.json({ status: 'ok', timestamp: new Date().toISOString() });
});
// API Routes
app.use('/api/auth', authRoutes);
app.use('/api/products', productRoutes);
app.use('/api/products', priceRoutes);
// Error handling middleware
app.use(
(
err: Error,
_req: express.Request,
res: express.Response,
_next: express.NextFunction
) => {
console.error('Unhandled error:', err);
res.status(500).json({ error: 'Internal server error' });
}
);
// Start server
app.listen(PORT, () => {
console.log(`PriceGhost API server running on port ${PORT}`);
// Start the background price checker
if (process.env.NODE_ENV !== 'test') {
startScheduler();
}
});
export default app;

View file

@ -0,0 +1,61 @@
import { Request, Response, NextFunction } from 'express';
import jwt from 'jsonwebtoken';
export interface AuthRequest extends Request {
userId?: number;
}
interface JwtPayload {
userId: number;
}
export const authMiddleware = (
req: AuthRequest,
res: Response,
next: NextFunction
): void => {
const authHeader = req.headers.authorization;
if (!authHeader) {
res.status(401).json({ error: 'No authorization header provided' });
return;
}
const parts = authHeader.split(' ');
if (parts.length !== 2 || parts[0] !== 'Bearer') {
res.status(401).json({ error: 'Invalid authorization header format' });
return;
}
const token = parts[1];
try {
const secret = process.env.JWT_SECRET;
if (!secret) {
throw new Error('JWT_SECRET not configured');
}
const decoded = jwt.verify(token, secret) as JwtPayload;
req.userId = decoded.userId;
next();
} catch (error) {
if (error instanceof jwt.TokenExpiredError) {
res.status(401).json({ error: 'Token expired' });
return;
}
if (error instanceof jwt.JsonWebTokenError) {
res.status(401).json({ error: 'Invalid token' });
return;
}
res.status(500).json({ error: 'Authentication failed' });
}
};
export const generateToken = (userId: number): string => {
const secret = process.env.JWT_SECRET;
if (!secret) {
throw new Error('JWT_SECRET not configured');
}
return jwt.sign({ userId }, secret, { expiresIn: '7d' });
};

233
backend/src/models/index.ts Normal file
View file

@ -0,0 +1,233 @@
import pool from '../config/database';
// User types and queries
export interface User {
id: number;
email: string;
password_hash: string;
created_at: Date;
}
export const userQueries = {
findByEmail: async (email: string): Promise<User | null> => {
const result = await pool.query(
'SELECT * FROM users WHERE email = $1',
[email]
);
return result.rows[0] || null;
},
findById: async (id: number): Promise<User | null> => {
const result = await pool.query(
'SELECT * FROM users WHERE id = $1',
[id]
);
return result.rows[0] || null;
},
create: async (email: string, passwordHash: string): Promise<User> => {
const result = await pool.query(
'INSERT INTO users (email, password_hash) VALUES ($1, $2) RETURNING *',
[email, passwordHash]
);
return result.rows[0];
},
};
// Product types and queries
export interface Product {
id: number;
user_id: number;
url: string;
name: string | null;
image_url: string | null;
refresh_interval: number;
last_checked: Date | null;
created_at: Date;
}
export interface ProductWithLatestPrice extends Product {
current_price: number | null;
currency: string | null;
}
export const productQueries = {
findByUserId: async (userId: number): Promise<ProductWithLatestPrice[]> => {
const result = await pool.query(
`SELECT p.*, ph.price as current_price, ph.currency
FROM products p
LEFT JOIN LATERAL (
SELECT price, currency FROM price_history
WHERE product_id = p.id
ORDER BY recorded_at DESC
LIMIT 1
) ph ON true
WHERE p.user_id = $1
ORDER BY p.created_at DESC`,
[userId]
);
return result.rows;
},
findById: async (id: number, userId: number): Promise<ProductWithLatestPrice | null> => {
const result = await pool.query(
`SELECT p.*, ph.price as current_price, ph.currency
FROM products p
LEFT JOIN LATERAL (
SELECT price, currency FROM price_history
WHERE product_id = p.id
ORDER BY recorded_at DESC
LIMIT 1
) ph ON true
WHERE p.id = $1 AND p.user_id = $2`,
[id, userId]
);
return result.rows[0] || null;
},
create: async (
userId: number,
url: string,
name: string | null,
imageUrl: string | null,
refreshInterval: number = 3600
): Promise<Product> => {
const result = await pool.query(
`INSERT INTO products (user_id, url, name, image_url, refresh_interval)
VALUES ($1, $2, $3, $4, $5)
RETURNING *`,
[userId, url, name, imageUrl, refreshInterval]
);
return result.rows[0];
},
update: async (
id: number,
userId: number,
updates: { name?: string; refresh_interval?: number }
): Promise<Product | null> => {
const fields: string[] = [];
const values: (string | number)[] = [];
let paramIndex = 1;
if (updates.name !== undefined) {
fields.push(`name = $${paramIndex++}`);
values.push(updates.name);
}
if (updates.refresh_interval !== undefined) {
fields.push(`refresh_interval = $${paramIndex++}`);
values.push(updates.refresh_interval);
}
if (fields.length === 0) return null;
values.push(id, userId);
const result = await pool.query(
`UPDATE products SET ${fields.join(', ')}
WHERE id = $${paramIndex++} AND user_id = $${paramIndex}
RETURNING *`,
values
);
return result.rows[0] || null;
},
delete: async (id: number, userId: number): Promise<boolean> => {
const result = await pool.query(
'DELETE FROM products WHERE id = $1 AND user_id = $2',
[id, userId]
);
return (result.rowCount ?? 0) > 0;
},
updateLastChecked: async (id: number): Promise<void> => {
await pool.query(
'UPDATE products SET last_checked = CURRENT_TIMESTAMP WHERE id = $1',
[id]
);
},
findDueForRefresh: async (): Promise<Product[]> => {
const result = await pool.query(
`SELECT * FROM products
WHERE last_checked IS NULL
OR last_checked + (refresh_interval || ' seconds')::interval < CURRENT_TIMESTAMP`
);
return result.rows;
},
};
// Price History types and queries
export interface PriceHistory {
id: number;
product_id: number;
price: number;
currency: string;
recorded_at: Date;
}
export const priceHistoryQueries = {
findByProductId: async (
productId: number,
days?: number
): Promise<PriceHistory[]> => {
let query = `
SELECT * FROM price_history
WHERE product_id = $1
`;
const values: (number | string)[] = [productId];
if (days) {
query += ` AND recorded_at >= CURRENT_TIMESTAMP - ($2 || ' days')::interval`;
values.push(days.toString());
}
query += ' ORDER BY recorded_at ASC';
const result = await pool.query(query, values);
return result.rows;
},
create: async (
productId: number,
price: number,
currency: string = 'USD'
): Promise<PriceHistory> => {
const result = await pool.query(
`INSERT INTO price_history (product_id, price, currency)
VALUES ($1, $2, $3)
RETURNING *`,
[productId, price, currency]
);
return result.rows[0];
},
getLatest: async (productId: number): Promise<PriceHistory | null> => {
const result = await pool.query(
`SELECT * FROM price_history
WHERE product_id = $1
ORDER BY recorded_at DESC
LIMIT 1`,
[productId]
);
return result.rows[0] || null;
},
getStats: async (productId: number): Promise<{
min_price: number;
max_price: number;
avg_price: number;
price_count: number;
} | null> => {
const result = await pool.query(
`SELECT
MIN(price) as min_price,
MAX(price) as max_price,
AVG(price)::decimal(10,2) as avg_price,
COUNT(*) as price_count
FROM price_history
WHERE product_id = $1`,
[productId]
);
return result.rows[0] || null;
},
};

View file

@ -0,0 +1,93 @@
import { Router, Request, Response } from 'express';
import bcrypt from 'bcrypt';
import { userQueries } from '../models';
import { generateToken } from '../middleware/auth';
const router = Router();
// Register new user
router.post('/register', async (req: Request, res: Response) => {
try {
const { email, password } = req.body;
if (!email || !password) {
res.status(400).json({ error: 'Email and password are required' });
return;
}
if (password.length < 8) {
res.status(400).json({ error: 'Password must be at least 8 characters' });
return;
}
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
if (!emailRegex.test(email)) {
res.status(400).json({ error: 'Invalid email format' });
return;
}
const existingUser = await userQueries.findByEmail(email);
if (existingUser) {
res.status(409).json({ error: 'Email already registered' });
return;
}
const saltRounds = 12;
const passwordHash = await bcrypt.hash(password, saltRounds);
const user = await userQueries.create(email, passwordHash);
const token = generateToken(user.id);
res.status(201).json({
message: 'User registered successfully',
token,
user: {
id: user.id,
email: user.email,
},
});
} catch (error) {
console.error('Registration error:', error);
res.status(500).json({ error: 'Registration failed' });
}
});
// Login
router.post('/login', async (req: Request, res: Response) => {
try {
const { email, password } = req.body;
if (!email || !password) {
res.status(400).json({ error: 'Email and password are required' });
return;
}
const user = await userQueries.findByEmail(email);
if (!user) {
res.status(401).json({ error: 'Invalid email or password' });
return;
}
const isValidPassword = await bcrypt.compare(password, user.password_hash);
if (!isValidPassword) {
res.status(401).json({ error: 'Invalid email or password' });
return;
}
const token = generateToken(user.id);
res.json({
message: 'Login successful',
token,
user: {
id: user.id,
email: user.email,
},
});
} catch (error) {
console.error('Login error:', error);
res.status(500).json({ error: 'Login failed' });
}
});
export default router;

View file

@ -0,0 +1,93 @@
import { Router, Response } from 'express';
import { AuthRequest, authMiddleware } from '../middleware/auth';
import { productQueries, priceHistoryQueries } from '../models';
import { scrapePrice } from '../services/scraper';
const router = Router();
// All routes require authentication
router.use(authMiddleware);
// Get price history for a product
router.get('/:productId/prices', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const productId = parseInt(req.params.productId, 10);
if (isNaN(productId)) {
res.status(400).json({ error: 'Invalid product ID' });
return;
}
// Verify product belongs to user
const product = await productQueries.findById(productId, userId);
if (!product) {
res.status(404).json({ error: 'Product not found' });
return;
}
// Get optional days filter from query
const days = req.query.days ? parseInt(req.query.days as string, 10) : undefined;
const priceHistory = await priceHistoryQueries.findByProductId(
productId,
days
);
res.json({
product,
prices: priceHistory,
});
} catch (error) {
console.error('Error fetching price history:', error);
res.status(500).json({ error: 'Failed to fetch price history' });
}
});
// Force immediate price refresh
router.post('/:productId/refresh', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const productId = parseInt(req.params.productId, 10);
if (isNaN(productId)) {
res.status(400).json({ error: 'Invalid product ID' });
return;
}
// Verify product belongs to user
const product = await productQueries.findById(productId, userId);
if (!product) {
res.status(404).json({ error: 'Product not found' });
return;
}
// Scrape new price
const priceData = await scrapePrice(product.url);
if (!priceData) {
res.status(400).json({ error: 'Could not extract price from URL' });
return;
}
// Record new price
const newPrice = await priceHistoryQueries.create(
productId,
priceData.price,
priceData.currency
);
// Update last_checked timestamp
await productQueries.updateLastChecked(productId);
res.json({
message: 'Price refreshed successfully',
price: newPrice,
});
} catch (error) {
console.error('Error refreshing price:', error);
res.status(500).json({ error: 'Failed to refresh price' });
}
});
export default router;

View file

@ -0,0 +1,172 @@
import { Router, Response } from 'express';
import { AuthRequest, authMiddleware } from '../middleware/auth';
import { productQueries, priceHistoryQueries } from '../models';
import { scrapeProduct } from '../services/scraper';
const router = Router();
// All routes require authentication
router.use(authMiddleware);
// Get all products for the authenticated user
router.get('/', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const products = await productQueries.findByUserId(userId);
res.json(products);
} catch (error) {
console.error('Error fetching products:', error);
res.status(500).json({ error: 'Failed to fetch products' });
}
});
// Add a new product to track
router.post('/', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const { url, refresh_interval } = req.body;
if (!url) {
res.status(400).json({ error: 'URL is required' });
return;
}
// Validate URL
try {
new URL(url);
} catch {
res.status(400).json({ error: 'Invalid URL format' });
return;
}
// Scrape product info
const scrapedData = await scrapeProduct(url);
if (!scrapedData.price) {
res.status(400).json({
error: 'Could not extract price from the provided URL',
});
return;
}
// Create product
const product = await productQueries.create(
userId,
url,
scrapedData.name,
scrapedData.imageUrl,
refresh_interval || 3600
);
// Record initial price
await priceHistoryQueries.create(
product.id,
scrapedData.price.price,
scrapedData.price.currency
);
// Update last_checked timestamp
await productQueries.updateLastChecked(product.id);
// Fetch the product with the price
const productWithPrice = await productQueries.findById(product.id, userId);
res.status(201).json(productWithPrice);
} catch (error) {
// Handle unique constraint violation
if (
error instanceof Error &&
error.message.includes('duplicate key value')
) {
res.status(409).json({ error: 'You are already tracking this product' });
return;
}
console.error('Error adding product:', error);
res.status(500).json({ error: 'Failed to add product' });
}
});
// Get a specific product
router.get('/:id', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const productId = parseInt(req.params.id, 10);
if (isNaN(productId)) {
res.status(400).json({ error: 'Invalid product ID' });
return;
}
const product = await productQueries.findById(productId, userId);
if (!product) {
res.status(404).json({ error: 'Product not found' });
return;
}
// Get price stats
const stats = await priceHistoryQueries.getStats(productId);
res.json({ ...product, stats });
} catch (error) {
console.error('Error fetching product:', error);
res.status(500).json({ error: 'Failed to fetch product' });
}
});
// Update product settings
router.put('/:id', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const productId = parseInt(req.params.id, 10);
if (isNaN(productId)) {
res.status(400).json({ error: 'Invalid product ID' });
return;
}
const { name, refresh_interval } = req.body;
const product = await productQueries.update(productId, userId, {
name,
refresh_interval,
});
if (!product) {
res.status(404).json({ error: 'Product not found' });
return;
}
res.json(product);
} catch (error) {
console.error('Error updating product:', error);
res.status(500).json({ error: 'Failed to update product' });
}
});
// Delete a product
router.delete('/:id', async (req: AuthRequest, res: Response) => {
try {
const userId = req.userId!;
const productId = parseInt(req.params.id, 10);
if (isNaN(productId)) {
res.status(400).json({ error: 'Invalid product ID' });
return;
}
const deleted = await productQueries.delete(productId, userId);
if (!deleted) {
res.status(404).json({ error: 'Product not found' });
return;
}
res.json({ message: 'Product deleted successfully' });
} catch (error) {
console.error('Error deleting product:', error);
res.status(500).json({ error: 'Failed to delete product' });
}
});
export default router;

View file

@ -0,0 +1,76 @@
import cron from 'node-cron';
import { productQueries, priceHistoryQueries } from '../models';
import { scrapePrice } from './scraper';
let isRunning = false;
async function checkPrices(): Promise<void> {
if (isRunning) {
console.log('Price check already in progress, skipping...');
return;
}
isRunning = true;
console.log('Starting scheduled price check...');
try {
// Find all products that are due for a refresh
const products = await productQueries.findDueForRefresh();
console.log(`Found ${products.length} products to check`);
for (const product of products) {
try {
console.log(`Checking price for product ${product.id}: ${product.url}`);
const priceData = await scrapePrice(product.url);
if (priceData) {
// Get the latest recorded price to compare
const latestPrice = await priceHistoryQueries.getLatest(product.id);
// Only record if price has changed or it's the first entry
if (!latestPrice || latestPrice.price !== priceData.price) {
await priceHistoryQueries.create(
product.id,
priceData.price,
priceData.currency
);
console.log(
`Recorded new price for product ${product.id}: ${priceData.currency} ${priceData.price}`
);
} else {
console.log(`Price unchanged for product ${product.id}`);
}
} else {
console.warn(`Could not extract price for product ${product.id}`);
}
// Update last_checked even if price extraction failed
await productQueries.updateLastChecked(product.id);
// Add a small delay between requests to avoid rate limiting
await new Promise((resolve) => setTimeout(resolve, 2000));
} catch (error) {
console.error(`Error checking product ${product.id}:`, error);
// Continue with next product even if one fails
}
}
} catch (error) {
console.error('Error in scheduled price check:', error);
} finally {
isRunning = false;
console.log('Scheduled price check complete');
}
}
export function startScheduler(): void {
// Run every minute
cron.schedule('* * * * *', () => {
checkPrices().catch(console.error);
});
console.log('Price check scheduler started (runs every minute)');
}
// Allow manual trigger for testing
export { checkPrices };

View file

@ -0,0 +1,267 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import {
parsePrice,
ParsedPrice,
findMostLikelyPrice,
} from '../utils/priceParser';
export interface ScrapedProduct {
name: string | null;
price: ParsedPrice | null;
imageUrl: string | null;
url: string;
}
// Common price selectors used across e-commerce sites
const priceSelectors = [
// Schema.org
'[itemprop="price"]',
'[data-price]',
'[data-product-price]',
// Common class names
'.price',
'.product-price',
'.current-price',
'.sale-price',
'.final-price',
'.offer-price',
'#price',
'#priceblock_ourprice',
'#priceblock_dealprice',
'#priceblock_saleprice',
// Amazon specific
'.a-price .a-offscreen',
'.a-price-whole',
'#corePrice_feature_div .a-price .a-offscreen',
'#corePriceDisplay_desktop_feature_div .a-price .a-offscreen',
// Generic patterns
'[class*="price"]',
'[class*="Price"]',
'[id*="price"]',
'[id*="Price"]',
];
// Selectors for product name
const nameSelectors = [
'[itemprop="name"]',
'h1[class*="product"]',
'h1[class*="title"]',
'#productTitle',
'.product-title',
'.product-name',
'h1',
];
// Selectors for product image
const imageSelectors = [
'[itemprop="image"]',
'[property="og:image"]',
'#landingImage',
'#imgBlkFront',
'.product-image img',
'.main-image img',
'[data-zoom-image]',
'img[class*="product"]',
];
export async function scrapeProduct(url: string): Promise<ScrapedProduct> {
const result: ScrapedProduct = {
name: null,
price: null,
imageUrl: null,
url,
};
try {
const response = await axios.get(url, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
Connection: 'keep-alive',
'Upgrade-Insecure-Requests': '1',
},
timeout: 15000,
maxRedirects: 5,
});
const $ = cheerio.load(response.data);
// Try to extract from JSON-LD structured data first
const jsonLdData = extractJsonLd($);
if (jsonLdData) {
if (jsonLdData.name) result.name = jsonLdData.name;
if (jsonLdData.price) result.price = jsonLdData.price;
if (jsonLdData.image) result.imageUrl = jsonLdData.image;
}
// Extract product name
if (!result.name) {
result.name = extractName($);
}
// Extract price
if (!result.price) {
result.price = extractPrice($);
}
// Extract image
if (!result.imageUrl) {
result.imageUrl = extractImage($, url);
}
// Try Open Graph meta tags as fallback
if (!result.name) {
result.name = $('meta[property="og:title"]').attr('content') || null;
}
if (!result.imageUrl) {
result.imageUrl = $('meta[property="og:image"]').attr('content') || null;
}
} catch (error) {
console.error(`Error scraping ${url}:`, error);
}
return result;
}
function extractJsonLd(
$: cheerio.CheerioAPI
): { name?: string; price?: ParsedPrice; image?: string } | null {
try {
const scripts = $('script[type="application/ld+json"]');
for (let i = 0; i < scripts.length; i++) {
const content = $(scripts[i]).html();
if (!content) continue;
const data = JSON.parse(content);
const product = findProduct(data);
if (product) {
const result: { name?: string; price?: ParsedPrice; image?: string } =
{};
if (product.name) {
result.name = product.name;
}
if (product.offers) {
const offer = Array.isArray(product.offers)
? product.offers[0]
: product.offers;
if (offer.price) {
result.price = {
price: parseFloat(offer.price),
currency: offer.priceCurrency || 'USD',
};
}
}
if (product.image) {
result.image = Array.isArray(product.image)
? product.image[0]
: typeof product.image === 'string'
? product.image
: product.image.url;
}
return result;
}
}
} catch {
// JSON parse error, continue with other methods
}
return null;
}
function findProduct(data: unknown): Record<string, unknown> | null {
if (!data || typeof data !== 'object') return null;
const obj = data as Record<string, unknown>;
if (obj['@type'] === 'Product') {
return obj;
}
if (Array.isArray(data)) {
for (const item of data) {
const found = findProduct(item);
if (found) return found;
}
}
if (obj['@graph'] && Array.isArray(obj['@graph'])) {
for (const item of obj['@graph']) {
const found = findProduct(item);
if (found) return found;
}
}
return null;
}
function extractPrice($: cheerio.CheerioAPI): ParsedPrice | null {
const prices: ParsedPrice[] = [];
for (const selector of priceSelectors) {
const elements = $(selector);
elements.each((_, el) => {
const text =
$(el).attr('content') || $(el).attr('data-price') || $(el).text();
const parsed = parsePrice(text);
if (parsed) {
prices.push(parsed);
}
});
if (prices.length > 0) break;
}
return findMostLikelyPrice(prices);
}
function extractName($: cheerio.CheerioAPI): string | null {
for (const selector of nameSelectors) {
const element = $(selector).first();
if (element.length) {
const text = element.text().trim();
if (text && text.length > 0 && text.length < 500) {
return text;
}
}
}
return null;
}
function extractImage($: cheerio.CheerioAPI, baseUrl: string): string | null {
for (const selector of imageSelectors) {
const element = $(selector).first();
if (element.length) {
const src =
element.attr('src') ||
element.attr('content') ||
element.attr('data-zoom-image') ||
element.attr('data-src');
if (src) {
// Handle relative URLs
try {
return new URL(src, baseUrl).href;
} catch {
return src;
}
}
}
}
return null;
}
export async function scrapePrice(url: string): Promise<ParsedPrice | null> {
const product = await scrapeProduct(url);
return product.price;
}

View file

@ -0,0 +1,121 @@
export interface ParsedPrice {
price: number;
currency: string;
}
// Currency symbols and their codes
const currencyMap: Record<string, string> = {
'$': 'USD',
'€': 'EUR',
'£': 'GBP',
'¥': 'JPY',
'₹': 'INR',
'CAD': 'CAD',
'AUD': 'AUD',
'USD': 'USD',
'EUR': 'EUR',
'GBP': 'GBP',
};
// Patterns to match prices in text
const pricePatterns = [
// $29.99 or $29,99 or $ 29.99
/(?<currency>[$€£¥₹])\s*(?<price>[\d,]+\.?\d*)/,
// 29.99 USD or 29,99 EUR
/(?<price>[\d,]+\.?\d*)\s*(?<currency>USD|EUR|GBP|CAD|AUD|JPY|INR)/i,
// Plain number with optional decimal (fallback)
/(?<price>\d{1,3}(?:[,.\s]?\d{3})*(?:[.,]\d{2})?)/,
];
export function parsePrice(text: string): ParsedPrice | null {
if (!text) return null;
// Clean up the text
const cleanText = text.trim().replace(/\s+/g, ' ');
for (const pattern of pricePatterns) {
const match = cleanText.match(pattern);
if (match && match.groups) {
const priceStr = match.groups.price || match[1];
const currencySymbol = match.groups.currency || '$';
if (priceStr) {
const price = normalizePrice(priceStr);
if (price !== null && price > 0) {
const currency = currencyMap[currencySymbol] || 'USD';
return { price, currency };
}
}
}
}
// Try to extract just a number as fallback
const numberMatch = cleanText.match(/[\d,]+\.?\d*/);
if (numberMatch) {
const price = normalizePrice(numberMatch[0]);
if (price !== null && price > 0) {
return { price, currency: 'USD' };
}
}
return null;
}
function normalizePrice(priceStr: string): number | null {
if (!priceStr) return null;
// Remove spaces
let normalized = priceStr.replace(/\s/g, '');
// Handle European format (1.234,56) vs US format (1,234.56)
const hasCommaDecimal = /,\d{2}$/.test(normalized);
const hasDotDecimal = /\.\d{2}$/.test(normalized);
if (hasCommaDecimal && !hasDotDecimal) {
// European format: 1.234,56 -> 1234.56
normalized = normalized.replace(/\./g, '').replace(',', '.');
} else {
// US format or plain number: remove commas
normalized = normalized.replace(/,/g, '');
}
const price = parseFloat(normalized);
return isNaN(price) ? null : Math.round(price * 100) / 100;
}
export function extractPricesFromText(html: string): ParsedPrice[] {
const prices: ParsedPrice[] = [];
const seen = new Set<number>();
// Match all price-like patterns in the HTML
const allMatches = html.matchAll(
/(?:[$€£¥₹])\s*[\d,]+\.?\d*|[\d,]+\.?\d*\s*(?:USD|EUR|GBP|CAD|AUD)/gi
);
for (const match of allMatches) {
const parsed = parsePrice(match[0]);
if (parsed && !seen.has(parsed.price)) {
seen.add(parsed.price);
prices.push(parsed);
}
}
return prices;
}
export function findMostLikelyPrice(prices: ParsedPrice[]): ParsedPrice | null {
if (prices.length === 0) return null;
if (prices.length === 1) return prices[0];
// Filter out very small prices (likely not product prices)
const validPrices = prices.filter((p) => p.price >= 0.99);
if (validPrices.length === 0) return prices[0];
// Sort by price and pick the middle one (often the actual price)
// This helps avoid picking shipping costs or discounts
validPrices.sort((a, b) => a.price - b.price);
// Return the first (lowest) valid price - often the current/sale price
return validPrices[0];
}