mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-06-26 15:49:39 +02:00
Add Puppeteer fallback for Cloudflare-protected sites
When HTTP requests are blocked with 403 (e.g., B&H Photo's Cloudflare protection), the scraper now automatically retries using a headless Chrome browser via Puppeteer. Also updated Dockerfile to include Chromium dependencies for container deployment. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
7c8ab0721b
commit
c96861fefb
4 changed files with 1415 additions and 36 deletions
|
|
@ -1,5 +1,5 @@
|
|||
# Build stage
|
||||
FROM node:20-alpine AS builder
|
||||
FROM node:20-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
|
@ -16,10 +16,38 @@ COPY . .
|
|||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM node:20-alpine AS production
|
||||
FROM node:20-slim AS production
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install Chromium dependencies for Puppeteer
|
||||
RUN apt-get update && apt-get install -y \
|
||||
chromium \
|
||||
fonts-liberation \
|
||||
libasound2 \
|
||||
libatk-bridge2.0-0 \
|
||||
libatk1.0-0 \
|
||||
libcups2 \
|
||||
libdbus-1-3 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libnspr4 \
|
||||
libnss3 \
|
||||
libx11-xcb1 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
libxss1 \
|
||||
xdg-utils \
|
||||
--no-install-recommends \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set Puppeteer to use installed Chromium
|
||||
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
||||
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
|
||||
# Copy package files
|
||||
COPY package*.json ./
|
||||
|
||||
|
|
@ -30,8 +58,10 @@ RUN npm install --omit=dev
|
|||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
# Create non-root user
|
||||
RUN addgroup -g 1001 -S nodejs && \
|
||||
adduser -S nodejs -u 1001
|
||||
RUN groupadd -r nodejs && useradd -r -g nodejs nodejs
|
||||
|
||||
# Change ownership of app directory
|
||||
RUN chown -R nodejs:nodejs /app
|
||||
|
||||
USER nodejs
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue