mirror of
https://github.com/clucraft/PriceGhost.git
synced 2026-05-30 14:35:16 +02:00
Add human-like behavior to browser scraping
This commit is contained in:
parent
9af18969f3
commit
58ad638641
1 changed files with 19 additions and 5 deletions
|
|
@ -21,11 +21,13 @@ async function scrapeWithBrowser(url: string): Promise<string> {
|
||||||
'--no-sandbox',
|
'--no-sandbox',
|
||||||
'--disable-setuid-sandbox',
|
'--disable-setuid-sandbox',
|
||||||
'--disable-dev-shm-usage',
|
'--disable-dev-shm-usage',
|
||||||
'--disable-accelerated-2d-canvas',
|
'--disable-blink-features=AutomationControlled',
|
||||||
'--disable-gpu',
|
'--disable-infobars',
|
||||||
'--window-size=1920,1080',
|
'--window-size=1920,1080',
|
||||||
|
'--start-maximized',
|
||||||
],
|
],
|
||||||
executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || undefined,
|
executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || undefined,
|
||||||
|
ignoreDefaultArgs: ['--enable-automation'],
|
||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
@ -40,9 +42,14 @@ async function scrapeWithBrowser(url: string): Promise<string> {
|
||||||
timeout: 45000,
|
timeout: 45000,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Add some human-like behavior
|
||||||
|
await page.mouse.move(100, 200);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 500));
|
||||||
|
await page.mouse.move(300, 400);
|
||||||
|
|
||||||
// Wait for Cloudflare challenge to complete if present
|
// Wait for Cloudflare challenge to complete if present
|
||||||
// Check if we're on a challenge page and wait for it to resolve
|
// Check if we're on a challenge page and wait for it to resolve
|
||||||
const maxWaitTime = 15000;
|
const maxWaitTime = 20000;
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
while (Date.now() - startTime < maxWaitTime) {
|
while (Date.now() - startTime < maxWaitTime) {
|
||||||
|
|
@ -53,11 +60,18 @@ async function scrapeWithBrowser(url: string): Promise<string> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
console.log(`[Browser] Waiting for Cloudflare challenge to complete... (${title})`);
|
console.log(`[Browser] Waiting for Cloudflare challenge to complete... (${title})`);
|
||||||
|
// Move mouse randomly while waiting
|
||||||
|
await page.mouse.move(
|
||||||
|
100 + Math.random() * 500,
|
||||||
|
100 + Math.random() * 400
|
||||||
|
);
|
||||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Additional wait for dynamic content
|
// Scroll down a bit like a human would
|
||||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
||||||
|
await page.evaluate('window.scrollBy(0, 300)');
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
// Get the full HTML content
|
// Get the full HTML content
|
||||||
const html = await page.content();
|
const html = await page.content();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue