mirror of
https://github.com/feder-cr/invisible_playwright.git
synced 2026-06-10 08:45:13 +02:00
Compare commits
66 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
12883bb4c7 | ||
|
|
ef86cd57dc | ||
|
|
2410582960 | ||
|
|
df4493d553 | ||
|
|
8ba88958be | ||
|
|
4564b26158 | ||
|
|
036a1a1d5f | ||
|
|
0b53e18e23 | ||
|
|
62cdf626a0 | ||
|
|
5dac302938 | ||
|
|
67b5e7cd5e | ||
|
|
5f546f4d63 | ||
|
|
2dd2224e73 | ||
|
|
610f09d2c2 | ||
|
|
90529ff181 | ||
|
|
8d7b6eafdf | ||
|
|
7260f461bb | ||
|
|
86a04d2d34 | ||
|
|
eec373a719 | ||
|
|
215b8801d7 | ||
|
|
cc7d95c8ae | ||
|
|
8bf72da40c | ||
|
|
e2bcd0cd4c | ||
|
|
6f44e1af38 | ||
|
|
262d388b99 | ||
|
|
b7eda606a2 | ||
|
|
e3b8a42ded | ||
|
|
26fa962d24 | ||
|
|
7b860b7398 | ||
|
|
f2664f96e1 | ||
|
|
369f3f7fdb | ||
|
|
d6c3de7730 | ||
|
|
143aff4bd2 | ||
|
|
ee0fe57ced | ||
|
|
929da150bc | ||
|
|
66c6b09821 | ||
|
|
f208f5262c | ||
|
|
35508595fa | ||
|
|
97a3cdfc17 | ||
|
|
033d0e9b35 | ||
|
|
def731e6ec | ||
|
|
4a71a0142a | ||
|
|
9432e789c6 | ||
|
|
75e6927904 | ||
|
|
f4d42dcac4 | ||
|
|
0375cf3f79 | ||
|
|
62b9030d2a | ||
|
|
60d13a2b6e | ||
|
|
3d8ba0b82c | ||
|
|
413db06690 | ||
|
|
70b6a54dbc | ||
|
|
bfccd61863 | ||
|
|
34aeb9601f | ||
|
|
64eef4daff | ||
|
|
cb3755cdd5 | ||
|
|
9571c3049d | ||
|
|
1701b34688 | ||
|
|
b98455bf8a | ||
|
|
2e0adbde33 | ||
|
|
cf59e98fa9 | ||
|
|
22b1171518 | ||
|
|
acd568f5d3 | ||
|
|
5f0ba5d659 | ||
|
|
a0b61d1abf | ||
|
|
1eb3d5f55a | ||
|
|
567717dfd7 |
56 changed files with 7877 additions and 209 deletions
31
.githooks/pre-push
Normal file
31
.githooks/pre-push
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#!/bin/sh
|
||||
# Pre-push hook: blocks push if the test suite isn't fully green.
|
||||
#
|
||||
# Enable once with:
|
||||
# git config core.hooksPath .githooks
|
||||
#
|
||||
# Bypass for a known-broken WIP push (NOT for releases):
|
||||
# git push --no-verify
|
||||
# The --no-verify flag is the only escape hatch. Use it sparingly and never
|
||||
# for branches that feed into a release.
|
||||
|
||||
set -e
|
||||
|
||||
echo "[pre-push] running unit + integration tests before push..."
|
||||
|
||||
# Run from this script's directory so it works regardless of where the user
|
||||
# invoked git push from.
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
# Default pyproject addopts skip slow/e2e. That's the gate we want for every
|
||||
# push — fast feedback. e2e is reserved for explicit release runs.
|
||||
if ! python -m pytest -q --tb=short; then
|
||||
echo ""
|
||||
echo "[pre-push] TESTS FAILED — push aborted."
|
||||
echo "[pre-push] Either fix the failure or use 'git push --no-verify' if"
|
||||
echo "[pre-push] you really know what you're doing (NOT for release branches)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[pre-push] all tests green — push proceeding."
|
||||
exit 0
|
||||
98
.github/ISSUE_TEMPLATE/01-launch-failure.yml
vendored
Normal file
98
.github/ISSUE_TEMPLATE/01-launch-failure.yml
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
name: Launch failure
|
||||
description: Browser or wrapper fails to start (install errors, missing deps, profile load fails, never reaches new_page)
|
||||
title: "[launch] "
|
||||
labels: ["bug", "launch-failure"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Use this when the browser never reaches a usable state.
|
||||
If it starts and the bug appears on a site or clicking something, use the site/action template instead.
|
||||
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: Version
|
||||
description: Output of `python -m invisible_playwright version`.
|
||||
placeholder: 0.1.7 (binary firefox-7)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
options:
|
||||
- Windows 10/11 x86_64
|
||||
- Linux x86_64
|
||||
- macOS (unsupported)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python
|
||||
attributes:
|
||||
label: Python
|
||||
placeholder: 3.11.7
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: install_cmd
|
||||
attributes:
|
||||
label: How you installed
|
||||
placeholder: pip install invisible_playwright
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: snippet
|
||||
attributes:
|
||||
label: What you ran
|
||||
description: Stop at the line that errors out. Redact creds.
|
||||
render: python
|
||||
value: |
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
with InvisiblePlaywright(seed=42) as browser:
|
||||
ctx = browser.new_context()
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: traceback
|
||||
attributes:
|
||||
label: Full traceback
|
||||
description: The whole stack trace verbatim. Don't summarize.
|
||||
render: text
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Extra logs
|
||||
description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1`. Optional but speeds things up.
|
||||
render: text
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: tried
|
||||
attributes:
|
||||
label: What you already tried
|
||||
description: Reinstall, clear cache, different Python version, different proxy, etc.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: checkboxes
|
||||
id: confirm
|
||||
attributes:
|
||||
label: Before submitting
|
||||
options:
|
||||
- label: Searched existing issues.
|
||||
required: true
|
||||
- label: On the latest released version.
|
||||
required: true
|
||||
- label: Removed credentials and personal paths from the snippet and logs.
|
||||
required: true
|
||||
167
.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml
vendored
Normal file
167
.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml
vendored
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
name: Site or action bug
|
||||
description: Browser starts fine but a navigation, click, evaluate, or other operation fails or behaves wrong
|
||||
title: "[bug] "
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
For bugs that happen after the browser is up.
|
||||
If the browser never launches, use the launch failure template.
|
||||
If a fingerprint detector flags the browser, use the stealth detection template.
|
||||
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: Version
|
||||
description: Output of `python -m invisible_playwright version`.
|
||||
placeholder: 0.1.7 (binary firefox-7)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
options:
|
||||
- Windows 10/11 x86_64
|
||||
- Linux x86_64
|
||||
- macOS (unsupported)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: python
|
||||
attributes:
|
||||
label: Python
|
||||
placeholder: 3.11.7
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: headless
|
||||
attributes:
|
||||
label: headless=
|
||||
description: Some bugs only repro on Windows headless=True (hidden alt-desktop path).
|
||||
options:
|
||||
- "True"
|
||||
- "False"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: proxy
|
||||
attributes:
|
||||
label: Proxy
|
||||
description: Sites often vary by IP geo (e.g. GDPR consent shows only on UK/EU).
|
||||
options:
|
||||
- No proxy (host network)
|
||||
- Residential, UK/GB
|
||||
- Residential, US
|
||||
- Residential, other country (specify in notes)
|
||||
- Datacenter (specify provider in notes)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: profile
|
||||
attributes:
|
||||
label: Profile dir
|
||||
options:
|
||||
- Fresh each run (no profile_dir)
|
||||
- Persistent profile_dir, reusing across runs
|
||||
- Persistent profile_dir, first run creating it
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: url
|
||||
attributes:
|
||||
label: URL
|
||||
description: The exact URL passed to `page.goto`. Not "the homepage" — the literal string.
|
||||
placeholder: https://id.sky.com/
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: snippet
|
||||
attributes:
|
||||
label: Runnable reproduction
|
||||
description: A complete snippet we can copy, paste, run. Stub creds with placeholders, keep everything else literal.
|
||||
render: python
|
||||
value: |
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, headless=True) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto("https://example.com/")
|
||||
# the exact operation that fails:
|
||||
page.click("button:has-text('Accept all')")
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: selector
|
||||
attributes:
|
||||
label: Selector or locator
|
||||
description: The exact string passed to locator/click/frame_locator. Write N/A if not a selector bug.
|
||||
placeholder: page.frame_locator("iframe[id^='sp_message_iframe_']").get_by_text("Accept all")
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: Expected
|
||||
description: What should happen when the snippet runs?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
label: Actual
|
||||
description: What happens instead? Full traceback, error string verbatim, any page.on('crash') firing.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: screenshot
|
||||
attributes:
|
||||
label: Screenshot
|
||||
description: Drag-drop a screenshot if the bug is visual. Optional but useful.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Browser logs
|
||||
description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1 | tail -200`. Redact creds and real IPs.
|
||||
render: text
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: notes
|
||||
attributes:
|
||||
label: Notes
|
||||
description: Anything else, hypotheses, related issues, things you've already tried.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: checkboxes
|
||||
id: confirm
|
||||
attributes:
|
||||
label: Before submitting
|
||||
options:
|
||||
- label: Searched existing issues.
|
||||
required: true
|
||||
- label: On the latest released version.
|
||||
required: true
|
||||
- label: The snippet above runs end-to-end on a clean Python install.
|
||||
required: true
|
||||
- label: Removed credentials, proxy passwords, real IPs, personal file paths.
|
||||
required: true
|
||||
141
.github/ISSUE_TEMPLATE/03-stealth-detection.yml
vendored
Normal file
141
.github/ISSUE_TEMPLATE/03-stealth-detection.yml
vendored
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
name: Stealth detection
|
||||
description: A fingerprint detector flagged the browser as a bot, VM, VPN, anti-detect, tampered, or otherwise non-human
|
||||
title: "[detect] "
|
||||
labels: ["bug", "stealth"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Use this when something detects the browser (Fingerprint Pro, CreepJS, BotD, reCAPTCHA, Cloudflare, sannysoft, etc).
|
||||
Bugs in operations (clicks, navigation) go to the site/action template.
|
||||
Browser failing to start goes to the launch failure template.
|
||||
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: Version
|
||||
placeholder: 0.1.7 (binary firefox-7)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: OS
|
||||
options:
|
||||
- Windows 10/11 x86_64
|
||||
- Linux x86_64
|
||||
- macOS (unsupported)
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: headless
|
||||
attributes:
|
||||
label: headless=
|
||||
options:
|
||||
- "True"
|
||||
- "False"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: proxy
|
||||
attributes:
|
||||
label: Proxy
|
||||
description: Datacenter or wrong-country proxies trip most detectors regardless of the browser. Be honest about what you used.
|
||||
options:
|
||||
- No proxy (host network)
|
||||
- Residential, matching target geo
|
||||
- Residential, different geo than target
|
||||
- Datacenter (specify provider in notes)
|
||||
- Mobile / 4G
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: detector
|
||||
attributes:
|
||||
label: Detector name and URL
|
||||
description: Exact site / service / product that flagged us.
|
||||
placeholder: Fingerprint Pro — https://demo.fingerprint.com/playground
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: scores
|
||||
attributes:
|
||||
label: Detector verdict
|
||||
description: Paste the relevant flags / scores verbatim. For Fingerprint Pro paste `bot`, `vpn`, `virtual_machine`, `tampering*`, `vm_ml_score`, `suspect_score`. For CreepJS the headless / lies / trust scores. For reCAPTCHA v3 the score number.
|
||||
render: text
|
||||
placeholder: |
|
||||
bot: bad
|
||||
vpn: true
|
||||
virtual_machine: true
|
||||
vm_ml_score: 0.74
|
||||
suspect_score: 22
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: screenshot
|
||||
attributes:
|
||||
label: Screenshot of the detector result
|
||||
description: Drag-drop a screenshot of the detector page so we see what you see.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: snippet
|
||||
attributes:
|
||||
label: How you launched
|
||||
description: The InvisiblePlaywright launch + navigation that produced the result above. Redact creds.
|
||||
render: python
|
||||
value: |
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, headless=True) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto("https://demo.fingerprint.com/playground")
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: What you expected
|
||||
description: Most detectors will never give a perfect score for any browser. Tell us what threshold you'd accept (e.g. bot=not_detected, vm_ml_score < 0.3).
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: full_report
|
||||
attributes:
|
||||
label: Full detector response
|
||||
description: For Fingerprint Pro paste the JSON from /api/event/v4/ if you have it. For CreepJS paste the full Smart Signals block. Optional but speeds things up a lot.
|
||||
render: json
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: notes
|
||||
attributes:
|
||||
label: Notes
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: checkboxes
|
||||
id: confirm
|
||||
attributes:
|
||||
label: Before submitting
|
||||
options:
|
||||
- label: Searched existing issues.
|
||||
required: true
|
||||
- label: On the latest released version.
|
||||
required: true
|
||||
- label: The detector verdict above is from a real run, not a hypothesis.
|
||||
required: true
|
||||
- label: Removed credentials, real IPs, FpJS visitor_id values, personal file paths from the snippet and full report.
|
||||
required: true
|
||||
79
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
79
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
|
|
@ -1,79 +0,0 @@
|
|||
name: Bug report
|
||||
description: Report a bug in the invisible_playwright Python wrapper
|
||||
title: "[bug] "
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to file a bug report.
|
||||
|
||||
Before continuing, please:
|
||||
- Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues?q=is%3Aissue) to avoid duplicates.
|
||||
- If the bug is in the **patched Firefox itself** (canvas/WebGL/audio/font spoofing, a detector flagging the browser), open it at [feder-cr/firefox-stealth](https://github.com/feder-cr/firefox-stealth/issues) instead.
|
||||
- **Do not** report security vulnerabilities here — follow [SECURITY.md](https://github.com/feder-cr/invisible_playwright/blob/main/SECURITY.md).
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: invisible_playwright version
|
||||
description: Output of `invisible_playwright version`
|
||||
placeholder: "0.1.0 (binary 150.0.1)"
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: os
|
||||
attributes:
|
||||
label: Operating system
|
||||
options:
|
||||
- Windows x86_64
|
||||
- Linux x86_64
|
||||
- Other (please specify in description)
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: python
|
||||
attributes:
|
||||
label: Python version
|
||||
placeholder: "3.11.7"
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: repro
|
||||
attributes:
|
||||
label: Minimal reproduction
|
||||
description: A small, self-contained code snippet that triggers the bug. Strip out anything unrelated.
|
||||
render: python
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: actual
|
||||
attributes:
|
||||
label: Actual behavior
|
||||
description: Include the full error message and traceback if any.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Logs / additional context
|
||||
description: Browser console output, environment variables, proxy config (redact credentials), etc.
|
||||
render: text
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: confirm
|
||||
attributes:
|
||||
label: Confirmations
|
||||
options:
|
||||
- label: I have searched existing issues and this bug has not been reported.
|
||||
required: true
|
||||
- label: I am on the latest release.
|
||||
required: true
|
||||
- label: I have removed any credentials, proxy passwords, or sensitive data from logs.
|
||||
required: true
|
||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
8
.github/ISSUE_TEMPLATE/config.yml
vendored
|
|
@ -3,9 +3,9 @@ contact_links:
|
|||
- name: Security vulnerability
|
||||
url: https://github.com/feder-cr/invisible_playwright/security/advisories/new
|
||||
about: Report a security issue privately. Do NOT open a public issue.
|
||||
- name: Bug in the patched Firefox itself (canvas / WebGL / fonts / WebRTC / etc.)
|
||||
url: https://github.com/feder-cr/firefox-stealth/issues
|
||||
about: Spoofing/fingerprint bugs belong in the firefox-stealth repo.
|
||||
- name: Bug in the patched Firefox source (C++, IDL, Juggler JS)
|
||||
url: https://github.com/feder-cr/invisible_firefox/issues
|
||||
about: Source-level patches in the Firefox fork go in the invisible_firefox repo. Detection results (FpJS, CreepJS, etc.) use the stealth detection template here.
|
||||
- name: Question or general discussion
|
||||
url: https://github.com/feder-cr/invisible_playwright/discussions
|
||||
about: For usage questions, ideas, and chat. Bugs and features still go in issues.
|
||||
about: Usage questions, ideas, chat. Bugs and features still go in issues.
|
||||
|
|
|
|||
52
.github/workflows/e2e.yml
vendored
Normal file
52
.github/workflows/e2e.yml
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# e2e.yml — run the FULL browser-driving e2e suite (the 127 @pytest.mark.e2e)
|
||||
# on GitHub, on every push/PR to main.
|
||||
#
|
||||
# Why this can run on CI when the drive-gate had to stay light: the drive-gate
|
||||
# launched Firefox in true HEADLESS mode, which is content-process unstable on
|
||||
# the hosted runners (eval-CSP / context-destroyed). The stealth wrapper instead
|
||||
# launches Firefox HEADED on a real display; under `xvfb-run` (a virtual X
|
||||
# server) that's exactly what we get on a headless CI box — stable, and the same
|
||||
# thing webrtc-e2e.yml already relies on.
|
||||
#
|
||||
# Secret-free, so it's safe in public CI: the binary is the PUBLIC firefox-9
|
||||
# release (no token), and the webrtc e2e fake a local TCP-only SOCKS. The proxy
|
||||
# realness gate (fppro / smartproxy) is NOT here — it needs secrets and stays a
|
||||
# local pre-release gate.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
name: e2e
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
name: e2e (linux, xvfb)
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 40
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with: { fetch-depth: 1 }
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
- name: Install wrapper + test deps (+ pinned Playwright)
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install ".[dev]"
|
||||
python -m pip install "playwright==$(cat scripts/playwright_pin.txt)"
|
||||
- name: System deps (xvfb + Firefox runtime libs)
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y xvfb
|
||||
sudo "$(which python)" -m playwright install-deps firefox
|
||||
- name: Fetch the published firefox binary
|
||||
run: echo "FF=$(python -m invisible_playwright fetch | tail -1)" >> "$GITHUB_ENV"
|
||||
- name: Run the full e2e suite under a virtual display
|
||||
run: xvfb-run -a python scripts/run_e2e.py "$FF"
|
||||
106
.github/workflows/firefox-launch-matrix.yml
vendored
Normal file
106
.github/workflows/firefox-launch-matrix.yml
vendored
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
name: firefox-launch-matrix
|
||||
|
||||
# Cross-Windows-edition smoke for the shipped firefox-N binary.
|
||||
# Triggered by issue #22 (firefox-7 SxS mismatch on Win11 build 26200,
|
||||
# reporter `jannusdorfer-create`).
|
||||
#
|
||||
# Runs the exact reporter snippet on every Windows runner GitHub offers,
|
||||
# from a fresh checkout. If any matrix cell fails the same way, the bug
|
||||
# is reproducible on at least one clean-ish environment and we ship a
|
||||
# sidecar mozglue.manifest fix. If all cells pass, the bug is confined
|
||||
# to the reporter's specific environment (Pro/Enterprise GPO, EDR, etc.).
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '.github/workflows/firefox-launch-matrix.yml'
|
||||
|
||||
jobs:
|
||||
smoke:
|
||||
name: launch (${{ matrix.os }}, py${{ matrix.python }})
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [windows-2022, windows-2025, windows-latest]
|
||||
python: ["3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
cache: pip
|
||||
|
||||
- name: Windows edition + build info
|
||||
shell: pwsh
|
||||
run: |
|
||||
$os = Get-CimInstance Win32_OperatingSystem
|
||||
Write-Host "Caption : $($os.Caption)"
|
||||
Write-Host "BuildNumber: $($os.BuildNumber)"
|
||||
Write-Host "OSArch : $($os.OSArchitecture)"
|
||||
Write-Host "Edition : $((Get-CimInstance Win32_OperatingSystem).OperatingSystemSKU)"
|
||||
Write-Host "---"
|
||||
Write-Host "VC++ Redistributables installed:"
|
||||
Get-ItemProperty 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\*' `
|
||||
-ErrorAction SilentlyContinue |
|
||||
Where-Object { $_.DisplayName -like '*Visual C++*Redist*' } |
|
||||
Select-Object DisplayName, DisplayVersion |
|
||||
Format-Table -AutoSize
|
||||
|
||||
- name: Install package from this commit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install .
|
||||
|
||||
- name: Fetch firefox-7 binary
|
||||
run: python -m invisible_playwright fetch
|
||||
|
||||
- name: Verify firefox.exe can launch standalone (the snippet that fails for issue #22)
|
||||
shell: pwsh
|
||||
run: |
|
||||
# The platformdirs path has the duplicated `invisible-playwright` segment
|
||||
# on Windows (user_cache_dir convention).
|
||||
$ffPath = "$env:LOCALAPPDATA\invisible-playwright\invisible-playwright\Cache\firefox-7\firefox.exe"
|
||||
if (-not (Test-Path $ffPath)) {
|
||||
Write-Error "firefox.exe NOT FOUND at $ffPath"
|
||||
exit 1
|
||||
}
|
||||
Write-Host "Launching: $ffPath --version"
|
||||
# NOTE: firefox.exe --version on Windows prints the version but may
|
||||
# return non-zero exit code (sub-process fork quirk). Check stdout.
|
||||
$output = & $ffPath --version 2>&1 | Out-String
|
||||
Write-Host "Output: $output"
|
||||
if ($output -notmatch 'Mozilla Firefox \d') {
|
||||
Write-Error "firefox.exe --version did not print a Mozilla Firefox version. Output was: $output"
|
||||
exit 1
|
||||
}
|
||||
Write-Host "OK: firefox.exe runs and prints version."
|
||||
|
||||
- name: Run reporter's exact InvisiblePlaywright snippet
|
||||
run: |
|
||||
python -c "
|
||||
import asyncio
|
||||
from invisible_playwright.async_api import InvisiblePlaywright
|
||||
async def main():
|
||||
async with InvisiblePlaywright(seed=9128) as browser:
|
||||
page = await browser.new_page()
|
||||
await page.goto('about:blank')
|
||||
print('OK: page loaded, url =', page.url)
|
||||
asyncio.run(main())
|
||||
"
|
||||
|
||||
- name: Upload diagnostics on failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: launch-failure-${{ matrix.os }}-py${{ matrix.python }}
|
||||
path: |
|
||||
${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/firefox.exe
|
||||
${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/mozglue.dll
|
||||
if-no-files-found: warn
|
||||
retention-days: 7
|
||||
402
.github/workflows/release.yml
vendored
Normal file
402
.github/workflows/release.yml
vendored
Normal file
|
|
@ -0,0 +1,402 @@
|
|||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# release.yml — build all 5 patched-Firefox targets at $0 and publish them as
|
||||
# DRAFT GitHub Release assets, named per the wrapper contract (constants.ARCHIVE_NAME).
|
||||
# DRAFT on purpose: a human runs the realness gate and only THEN un-drafts + bumps
|
||||
# BINARY_VERSION. Nothing auto-ships (issue #14 lesson).
|
||||
#
|
||||
# PACKAGING (issue #14: dangling symlinks broke 265 downloads — never again):
|
||||
# Linux → cp -aL (dereference ALL symlinks into real files) + rm dev tools +
|
||||
# strip + sanitize + tar at ROOT, then validate_release.py as a HARD
|
||||
# in-pipeline gate (the exact battle-tested script from the source repo).
|
||||
# Win → mach package; zip the CONTENTS of dist/firefox (clean tree, NOT
|
||||
# dist/bin) so firefox.exe sits at the zip ROOT.
|
||||
# macOS → mach package; ad-hoc codesign the .app; PRESERVE its internal relative
|
||||
# symlinks (a .app legitimately has them — cp -aL would break it); verify
|
||||
# every symlink is relative+internal; tar the bundle. --version self-gate.
|
||||
#
|
||||
# DRIVE GATE (the firefox-8 catcher): after build, every binary is DRIVEN by
|
||||
# Playwright on its native runner (launch via juggler + real page + JS roundtrip,
|
||||
# headless, no screenshot → GPU-free, zero proxy). A juggler-less binary renders
|
||||
# a screenshot fine but is undrivable — only an actual drive catches that. The
|
||||
# proxy realness gate (fppro/webrtc) stays LOCAL — it needs secrets.
|
||||
#
|
||||
# Trigger: push a tag `firefox-N`, or run manually. Hybrid runners, all free.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
name: release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags: ['firefox-*']
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
source_ref:
|
||||
description: 'invisible_firefox ref to build'
|
||||
default: 'stealth/150'
|
||||
release_tag:
|
||||
description: 'release tag to publish the draft under (e.g. firefox-9)'
|
||||
required: true
|
||||
|
||||
env:
|
||||
SOURCE_REPO: feder-cr/invisible_firefox
|
||||
SOURCE_REF: ${{ github.event.inputs.source_ref || 'stealth/150' }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: build-${{ matrix.leg }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 350
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- leg: linux-x86_64
|
||||
runner: ubuntu-24.04
|
||||
family: linux
|
||||
target: ''
|
||||
rust_target: x86_64-unknown-linux-gnu
|
||||
win_disables: 'no'
|
||||
extra_pkgs: ''
|
||||
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||
- leg: linux-arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
family: linux
|
||||
target: ''
|
||||
rust_target: aarch64-unknown-linux-gnu
|
||||
win_disables: 'no'
|
||||
extra_pkgs: ''
|
||||
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||
- leg: win-x86_64
|
||||
runner: ubuntu-24.04
|
||||
family: win
|
||||
target: x86_64-pc-windows-msvc
|
||||
rust_target: x86_64-pc-windows-msvc
|
||||
win_disables: 'yes'
|
||||
extra_pkgs: 'msitools p7zip-full zip'
|
||||
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||
- leg: macos-arm64
|
||||
runner: macos-15
|
||||
family: mac
|
||||
target: aarch64-apple-darwin
|
||||
rust_target: aarch64-apple-darwin
|
||||
win_disables: 'no'
|
||||
extra_pkgs: ''
|
||||
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||
- leg: macos-x86_64
|
||||
runner: macos-15-intel
|
||||
family: mac
|
||||
target: x86_64-apple-darwin
|
||||
rust_target: x86_64-apple-darwin
|
||||
win_disables: 'no'
|
||||
extra_pkgs: ''
|
||||
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||
steps:
|
||||
- name: Free disk + 16G swap (Linux runners)
|
||||
if: matrix.family != 'mac'
|
||||
run: |
|
||||
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android \
|
||||
/usr/local/share/boost "${AGENT_TOOLSDIRECTORY:-/opt/hostedtoolcache}" 2>/dev/null || true
|
||||
sudo fallocate -l 16G /swapfile && sudo chmod 600 /swapfile && sudo mkswap /swapfile && sudo swapon /swapfile || true
|
||||
|
||||
- name: Checkout patched Firefox source
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
repository: ${{ env.SOURCE_REPO }}
|
||||
ref: ${{ env.SOURCE_REF }}
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
|
||||
- name: Install Linux build tools
|
||||
if: matrix.family != 'mac'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y util-linux binutils ${{ matrix.extra_pkgs }}
|
||||
|
||||
- name: Select Xcode 26.2 + export SDK path (macOS)
|
||||
if: matrix.family == 'mac'
|
||||
run: |
|
||||
sudo xcode-select -s /Applications/Xcode_26.2.app
|
||||
SDKP="$(xcrun --show-sdk-path)"
|
||||
echo "SDK_PATH=$SDKP" >> "$GITHUB_ENV"
|
||||
echo "macOS SDK $(xcrun --sdk macosx --show-sdk-version) at $SDKP"
|
||||
|
||||
- name: Add Rust target
|
||||
run: rustup target add ${{ matrix.rust_target }} || true
|
||||
|
||||
- name: Extend the repo .mozconfig (NO mold; +target/SDK as needed)
|
||||
run: |
|
||||
test -f .mozconfig || { echo "ERROR: no .mozconfig in source"; exit 1; }
|
||||
rm -f mozconfig
|
||||
{
|
||||
echo ""
|
||||
echo "# --- release CI levers for ${{ matrix.leg }} (mold intentionally OFF — it segfaults libxul) ---"
|
||||
echo "ac_add_options --disable-debug-symbols"
|
||||
} >> .mozconfig
|
||||
if [ -n "${{ matrix.target }}" ]; then echo "ac_add_options --target=${{ matrix.target }}" >> .mozconfig; fi
|
||||
if [ "${{ matrix.family }}" = "mac" ]; then echo "ac_add_options --with-macos-sdk=$SDK_PATH" >> .mozconfig; fi
|
||||
if [ "${{ matrix.win_disables }}" = "yes" ]; then
|
||||
{ echo "ac_add_options --disable-default-browser-agent";
|
||||
echo "ac_add_options --disable-maintenance-service";
|
||||
echo "ac_add_options --disable-update-agent"; } >> .mozconfig
|
||||
fi
|
||||
if [ "${{ matrix.family }}" = "mac" ]; then NCPU=$(sysctl -n hw.ncpu); else NCPU=4; fi
|
||||
{ echo "mk_add_options MOZ_PARALLEL_BUILD=$NCPU";
|
||||
echo "mk_add_options MOZ_OBJDIR=@TOPSRCDIR@/obj-rel"; } >> .mozconfig
|
||||
echo "----- final .mozconfig -----"; cat .mozconfig
|
||||
|
||||
- name: Build
|
||||
run: ./mach build
|
||||
|
||||
# ── LINUX: dereference symlinks (issue #14) + strip + sanitize + tar@root + GATE
|
||||
- name: Package + validate (Linux)
|
||||
if: matrix.family == 'linux'
|
||||
run: |
|
||||
set -e
|
||||
DIST=obj-rel/dist/bin
|
||||
STAGING=staging
|
||||
rm -rf "$STAGING"; mkdir -p "$STAGING" out
|
||||
cp -aL "$DIST/." "$STAGING/" # -L: dereference ALL symlinks into real files
|
||||
N=$(find "$STAGING" -type l | wc -l)
|
||||
[ "$N" -eq 0 ] || { echo "ERROR: $N symlinks remain after cp -aL"; exit 1; }
|
||||
for t in xpcshell certutil pk12util rapl; do rm -f "$STAGING/$t"; done
|
||||
# JUGGLER GATE: the binary is undrivable by Playwright without it (see 70-known-bugs)
|
||||
{ [ -e "$STAGING/chrome/juggler.manifest" ] && [ -d "$STAGING/chrome/juggler" ]; } \
|
||||
|| { echo "ERROR: juggler missing from package (chrome/juggler) — Playwright can't drive it"; exit 1; }
|
||||
echo "juggler GATE OK (loose chrome/juggler present)"
|
||||
find "$STAGING" -type f \
|
||||
\( -name '*.so' -o -name firefox -o -name firefox-bin -o -name plugin-container \
|
||||
-o -name pingsender -o -name glxtest -o -name vaapitest -o -name updater \) \
|
||||
-exec strip --strip-debug {} + 2>/dev/null || true
|
||||
STAGING="$STAGING" python3 scripts/linux_sanitize.py || true # no-op in CI (no /home/feder), defensive
|
||||
tar --owner=0 --group=0 --numeric-owner --mtime="2026-01-01 00:00:00 UTC" \
|
||||
-czf "out/${{ matrix.asset }}" -C "$STAGING" . # firefox at ROOT
|
||||
echo "=== HARD GATE: scripts/validate_release.py (the issue-#14 protector) ==="
|
||||
python3 scripts/validate_release.py --linux "out/${{ matrix.asset }}" --linux-only
|
||||
ls -la out/
|
||||
|
||||
# ── WINDOWS (cross): zip the CLEAN dist/firefox tree, firefox.exe at root
|
||||
- name: Package (Windows cross)
|
||||
if: matrix.family == 'win'
|
||||
run: |
|
||||
set -e
|
||||
# Do NOT swallow a mach failure: `./mach package || echo` lets set -e pass
|
||||
# and would fall through to a stale tree. A release MUST come from the clean
|
||||
# dist/firefox; dist/bin is the dev tree (cruft + loose juggler that masked
|
||||
# the firefox-7/8 packaging bugs), never acceptable for a release.
|
||||
./mach package
|
||||
[ -f obj-rel/dist/firefox/firefox.exe ] \
|
||||
|| { echo "ERROR: mach package did not produce a clean dist/firefox tree"; exit 1; }
|
||||
WIN_APP=obj-rel/dist/firefox
|
||||
echo "packaging from: $WIN_APP"
|
||||
# JUGGLER GATE: omni.ja must carry juggler (else Playwright can't drive it)
|
||||
[ -f "$WIN_APP/omni.ja" ] || { echo "ERROR: no omni.ja in $WIN_APP"; exit 1; }
|
||||
python3 -c "import zipfile,sys; sys.exit(0 if any('juggler' in n.lower() for n in zipfile.ZipFile('$WIN_APP/omni.ja').namelist()) else 1)" \
|
||||
|| { echo "ERROR: juggler missing from $WIN_APP/omni.ja — Playwright can't drive it"; exit 1; }
|
||||
echo "juggler GATE OK (win)"
|
||||
mkdir -p out
|
||||
( cd "$WIN_APP" && zip -qr "$GITHUB_WORKSPACE/out/${{ matrix.asset }}" . ) # firefox.exe at zip ROOT
|
||||
ls -la out/
|
||||
|
||||
# ── macOS: package .app, ad-hoc sign, verify relative-internal symlinks, --version gate, tar
|
||||
- name: Package + validate (macOS)
|
||||
if: matrix.family == 'mac'
|
||||
run: |
|
||||
set -e
|
||||
./mach package
|
||||
APP="$(find obj-rel/dist -maxdepth 2 -name '*.app' -type d | head -1)"
|
||||
[ -n "$APP" ] || { echo "ERROR: no .app produced"; exit 1; }
|
||||
echo "built app: $APP"
|
||||
# JUGGLER GATE: the .app's omni.ja must carry juggler (else Playwright can't drive it)
|
||||
python3 -c "import zipfile,sys,glob; jas=glob.glob('$APP/Contents/Resources/omni.ja')+glob.glob('$APP/Contents/Resources/browser/omni.ja'); sys.exit(0 if jas and any(any('juggler' in n.lower() for n in zipfile.ZipFile(j).namelist()) for j in jas) else 1)" \
|
||||
|| { echo "ERROR: juggler missing from .app omni.ja — Playwright can't drive it"; exit 1; }
|
||||
echo "juggler GATE OK (mac)"
|
||||
codesign --force --deep --sign - --timestamp=none "$APP"
|
||||
codesign --verify --deep --strict --verbose=2 "$APP"
|
||||
echo "=== --version GATE ==="
|
||||
"$APP/Contents/MacOS/firefox" --version
|
||||
echo "=== critical files present ==="
|
||||
for need in "Contents/MacOS/firefox" "Contents/Info.plist"; do
|
||||
[ -e "$APP/$need" ] || { echo "ERROR: missing $need"; exit 1; }
|
||||
done
|
||||
echo "=== Info.plist well-formed + required keys (a malformed plist → Finder 'damaged') ==="
|
||||
plutil -lint "$APP/Contents/Info.plist"
|
||||
for key in CFBundleExecutable CFBundleIdentifier CFBundleShortVersionString; do
|
||||
plutil -extract "$key" raw -o - "$APP/Contents/Info.plist" >/dev/null \
|
||||
|| { echo "ERROR: Info.plist missing $key"; exit 1; }
|
||||
done
|
||||
EXEC="$(plutil -extract CFBundleExecutable raw -o - "$APP/Contents/Info.plist")"
|
||||
[ -e "$APP/Contents/MacOS/$EXEC" ] \
|
||||
|| { echo "ERROR: CFBundleExecutable '$EXEC' has no matching binary in Contents/MacOS"; exit 1; }
|
||||
echo "=== verify NO absolute symlinks in the .app (relative-internal ones are fine) ==="
|
||||
BAD="$(find "$APP" -type l -print0 | xargs -0 -I{} sh -c 't=$(readlink "{}"); case "$t" in /*) echo "{} -> $t";; esac')"
|
||||
[ -z "$BAD" ] || { echo "ERROR: absolute symlinks in .app (break on user machines):"; echo "$BAD" | head -5; exit 1; }
|
||||
echo "mac .app OK: critical files present, no absolute symlinks"
|
||||
STABLE="$(dirname "$APP")/Firefox.app"
|
||||
[ "$APP" = "$STABLE" ] || mv "$APP" "$STABLE"
|
||||
mkdir -p out
|
||||
tar -czf "out/${{ matrix.asset }}" -C "$(dirname "$STABLE")" Firefox.app # preserves internal symlinks
|
||||
ls -la out/
|
||||
|
||||
- name: Upload build artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: asset-${{ matrix.leg }}
|
||||
path: out/${{ matrix.asset }}
|
||||
if-no-files-found: error
|
||||
retention-days: 7
|
||||
|
||||
# DRIVE GATE — the firefox-8 catcher. A raw `firefox --screenshot` proves
|
||||
# nothing about automation: a juggler-less binary renders fine and ships
|
||||
# broken (firefox-8 did exactly that). So we DRIVE every binary the way users
|
||||
# will: Playwright launches it over the juggler pipe, loads a real page, and
|
||||
# round-trips JS. A binary missing/broken juggler throws TargetClosedError
|
||||
# here and the release never publishes. Headless, NO screenshot → GPU-free,
|
||||
# so it can't false-fail on the GPU-less hosted runners. Zero proxy / zero
|
||||
# secrets → safe in public CI (the proxy realness gate stays local, by design).
|
||||
# Each leg runs on its NATIVE runner so we test the real artifact, not a cross
|
||||
# surrogate. Playwright is pinned to a version validated against this build's
|
||||
# juggler; bump it in lockstep when the juggler is re-synced from upstream.
|
||||
gate:
|
||||
name: gate-${{ matrix.leg }}
|
||||
needs: build
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 25
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# `extra: --full` adds the mouse/keyboard/canvas/navsurface interaction
|
||||
# checks. Only on linux-x86_64 (historically the most reliable hosted
|
||||
# runner): the interaction code is platform-identical JS (omni.ja), so
|
||||
# one reliable full run catches a firefox-2-class regression for all
|
||||
# platforms. The other legs run SMOKE (launch+http+UA+webdriver) — the
|
||||
# firefox-8/juggler catcher — which is robust even on the flaky
|
||||
# windows-latest runner. See scripts/ci_drive_gate.py.
|
||||
- leg: linux-x86_64
|
||||
runner: ubuntu-24.04
|
||||
kind: linux
|
||||
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||
extra: '--full'
|
||||
- leg: linux-arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
kind: linux
|
||||
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||
extra: ''
|
||||
- leg: win-x86_64
|
||||
runner: windows-latest
|
||||
kind: win
|
||||
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||
extra: ''
|
||||
- leg: macos-arm64
|
||||
runner: macos-15
|
||||
kind: mac
|
||||
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||
extra: ''
|
||||
- leg: macos-x86_64
|
||||
runner: macos-15-intel
|
||||
kind: mac
|
||||
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||
extra: ''
|
||||
steps:
|
||||
- name: Checkout wrapper (for scripts/ci_drive_gate.py)
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with: { fetch-depth: 1 }
|
||||
- name: Download asset
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
name: asset-${{ matrix.leg }}
|
||||
path: art
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
- name: Install Playwright driver (no bundled browser — we override executable_path)
|
||||
# Pin from a SINGLE source (scripts/playwright_pin.txt) so release.yml and
|
||||
# verify-assets.yml can't drift to different versions. The drive gate then
|
||||
# ENFORCES playwright↔juggler compatibility: an incompatible pin fails the
|
||||
# launch/drive (TargetClosedError / protocol error) and nothing publishes.
|
||||
# Bump the pin file in lockstep when the juggler is re-synced from upstream.
|
||||
shell: bash
|
||||
run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)"
|
||||
- name: Linux system deps for headless firefox
|
||||
if: matrix.kind == 'linux'
|
||||
run: sudo "$(which python)" -m playwright install-deps firefox
|
||||
- name: Extract + locate firefox binary
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
mkdir -p ff
|
||||
A="art/${{ matrix.asset }}"
|
||||
case "${{ matrix.kind }}" in
|
||||
win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";;
|
||||
linux) tar xzf "$A" -C ff; EXE="ff/firefox";;
|
||||
mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";;
|
||||
esac
|
||||
[ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; }
|
||||
chmod +x "$EXE" 2>/dev/null || true
|
||||
echo "FF_EXE=$EXE" >> "$GITHUB_ENV"
|
||||
echo "located: $EXE"
|
||||
- name: DRIVE GATE — Playwright launch via juggler + real page (+ interaction on --full)
|
||||
shell: bash
|
||||
run: python scripts/ci_drive_gate.py "$FF_EXE" ${{ matrix.extra }}
|
||||
|
||||
publish:
|
||||
name: publish-draft-release
|
||||
needs: [build, gate]
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Download all build assets
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with: { pattern: asset-*, path: dl, merge-multiple: true }
|
||||
- name: Assert all 5 target archives present (no silent partial release)
|
||||
run: |
|
||||
cd dl
|
||||
EXPECTED="
|
||||
firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||
firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||
firefox-150.0.1-stealth-win-x86_64.zip
|
||||
firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||
firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||
"
|
||||
for a in $EXPECTED; do
|
||||
[ -s "$a" ] || { echo "ERROR: missing/empty release asset: $a (a build leg silently dropped out?)"; exit 1; }
|
||||
done
|
||||
echo "all 5 target archives present"
|
||||
- name: Generate checksums.txt
|
||||
run: |
|
||||
cd dl; ls -la
|
||||
# explicit glob — never include checksums.txt itself (the `*`-includes-itself trap)
|
||||
sha256sum firefox-150.0.1-stealth-* > checksums.txt
|
||||
echo "----- checksums.txt -----"; cat checksums.txt
|
||||
- name: Resolve release tag
|
||||
id: tag
|
||||
run: |
|
||||
TAG="${{ github.event.inputs.release_tag }}"
|
||||
[ -z "$TAG" ] && TAG="${GITHUB_REF_NAME}"
|
||||
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
||||
echo "publishing DRAFT release for tag: $TAG"
|
||||
- name: Create DRAFT release with all assets
|
||||
uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2
|
||||
with:
|
||||
tag_name: ${{ steps.tag.outputs.tag }}
|
||||
name: invisible_firefox (150.0.1) rev ${{ steps.tag.outputs.tag }}
|
||||
draft: true
|
||||
prerelease: false
|
||||
fail_on_unmatched_files: true
|
||||
files: |
|
||||
dl/*.tar.gz
|
||||
dl/*.zip
|
||||
dl/checksums.txt
|
||||
body: |
|
||||
Patched Firefox 150.0.1 — built on GitHub Actions ($0, no mold).
|
||||
Targets: linux-x86_64, linux-arm64, win-x86_64, macos-arm64, macos-x86_64.
|
||||
|
||||
DRAFT — do not publish until validate_release.py + realness gate pass on all archives.
|
||||
|
||||
macOS: ad-hoc signed (not notarized). After download run:
|
||||
xattr -dr com.apple.quarantine Firefox.app
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
111
.github/workflows/verify-assets.yml
vendored
Normal file
111
.github/workflows/verify-assets.yml
vendored
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# verify-assets.yml — re-runnable DRIVE GATE for an EXISTING release's assets.
|
||||
#
|
||||
# release.yml drive-gates every binary it builds. This does the same drive test
|
||||
# WITHOUT rebuilding: it downloads a release's already-published assets (works on
|
||||
# DRAFT releases too via GITHUB_TOKEN) and drives each one on its native runner.
|
||||
#
|
||||
# Use it to:
|
||||
# • drive-test a release that was built before the in-pipeline gate existed
|
||||
# (e.g. firefox-9, built on the old release.yml), or
|
||||
# • re-verify any shipped release on demand (regression check).
|
||||
#
|
||||
# Same single-source-of-truth drive logic as release.yml: scripts/ci_drive_gate.py.
|
||||
# Headless, no screenshot → GPU-free. Zero proxy / zero secrets.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
name: verify-assets
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_tag:
|
||||
description: 'release tag whose assets to drive-test (e.g. firefox-9)'
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
# write (not read) is required: GitHub only exposes DRAFT releases to tokens
|
||||
# with push access. With contents:read, `gh release download` on a draft tag
|
||||
# 404s ("release not found"). This workflow only READS assets — the elevated
|
||||
# scope is solely to make draft releases visible to GITHUB_TOKEN.
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
drive:
|
||||
name: drive-${{ matrix.leg }}
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 25
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# --full (interaction) only on the reliable linux-x86_64 leg; others run
|
||||
# the robust SMOKE drive. Same rationale as release.yml's gate.
|
||||
- leg: linux-x86_64
|
||||
runner: ubuntu-24.04
|
||||
kind: linux
|
||||
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||
extra: '--full'
|
||||
- leg: linux-arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
kind: linux
|
||||
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||
extra: ''
|
||||
- leg: win-x86_64
|
||||
runner: windows-latest
|
||||
kind: win
|
||||
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||
extra: ''
|
||||
- leg: macos-arm64
|
||||
runner: macos-15
|
||||
kind: mac
|
||||
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||
extra: ''
|
||||
- leg: macos-x86_64
|
||||
runner: macos-15-intel
|
||||
kind: mac
|
||||
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||
extra: ''
|
||||
steps:
|
||||
- name: Checkout wrapper (for scripts/ci_drive_gate.py)
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with: { fetch-depth: 1 }
|
||||
- name: Download the release asset (draft releases included)
|
||||
shell: bash
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -e
|
||||
mkdir -p art
|
||||
gh release download "${{ github.event.inputs.release_tag }}" \
|
||||
--repo "${{ github.repository }}" \
|
||||
--pattern "${{ matrix.asset }}" \
|
||||
--dir art
|
||||
ls -la art/
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
with: { python-version: '3.11' }
|
||||
- name: Install Playwright driver (no bundled browser — we override executable_path)
|
||||
# Single-source pin (see release.yml); the drive gate enforces juggler compat.
|
||||
shell: bash
|
||||
run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)"
|
||||
- name: Linux system deps for headless firefox
|
||||
if: matrix.kind == 'linux'
|
||||
run: sudo "$(which python)" -m playwright install-deps firefox
|
||||
- name: Extract + locate firefox binary
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
mkdir -p ff
|
||||
A="art/${{ matrix.asset }}"
|
||||
case "${{ matrix.kind }}" in
|
||||
win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";;
|
||||
linux) tar xzf "$A" -C ff; EXE="ff/firefox";;
|
||||
mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";;
|
||||
esac
|
||||
[ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; }
|
||||
chmod +x "$EXE" 2>/dev/null || true
|
||||
echo "FF_EXE=$EXE" >> "$GITHUB_ENV"
|
||||
echo "located: $EXE"
|
||||
- name: DRIVE GATE — Playwright launch via juggler + real page (+ interaction on --full)
|
||||
shell: bash
|
||||
run: python scripts/ci_drive_gate.py "$FF_EXE" ${{ matrix.extra }}
|
||||
47
.github/workflows/webrtc-e2e.yml
vendored
Normal file
47
.github/workflows/webrtc-e2e.yml
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
name: webrtc-e2e
|
||||
|
||||
# Live WebRTC realness check against the shipped patched binary.
|
||||
#
|
||||
# Manual (workflow_dispatch) on purpose: it needs a firefox-N binary that
|
||||
# carries the WebRTC fixes (synthetic srflx in genuine nICEr form + the
|
||||
# default-route fallback behind a proxy). Run it after publishing such a
|
||||
# binary — it is the release gate for "WebRTC looks real behind a proxy".
|
||||
# Until that binary ships, test_not_blocked_behind_tcp_only_socks is EXPECTED
|
||||
# to fail (the old binary is fully blocked behind a SOCKS proxy), which is the
|
||||
# whole point of the gate.
|
||||
#
|
||||
# No smartproxy / credentials: the "behind a proxy" condition is faked by an
|
||||
# in-process TCP-only SOCKS5 server (refuses UDP ASSOCIATE) and the egress IP
|
||||
# is injected as an RFC 5737 TEST-NET address. Fully self-contained.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
webrtc-e2e:
|
||||
name: webrtc realness (ubuntu, py3.12)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: pip
|
||||
|
||||
- name: Install package + dev extras
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -e ".[dev]"
|
||||
|
||||
- name: Fetch the patched Firefox binary
|
||||
run: python -m invisible_playwright fetch
|
||||
|
||||
- name: Resolve binary path
|
||||
run: echo "STEALTHFOX_E2E_BINARY=$(python -m invisible_playwright path)" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run WebRTC realness e2e (xvfb for the headless Firefox)
|
||||
run: |
|
||||
sudo apt-get update && sudo apt-get install -y xvfb
|
||||
xvfb-run -a pytest tests/test_webrtc_realness.py -m e2e -o addopts="" -v -rs
|
||||
68
CHANGELOG.md
68
CHANGELOG.md
|
|
@ -6,10 +6,74 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- `timezone="auto"`: the browser timezone is auto-derived from the egress IP. By default (no explicit timezone) it ALWAYS resolves — from the proxy egress when a proxy is set, otherwise from the host's own public IP — so the zone can never disagree with the IP (the classic `timezone_mismatch` signal). An explicit `"Area/City"` is the only way to force a specific zone. On failure: with a proxy the launch raises (no silent host-TZ fallback behind a foreign proxy); without a proxy it falls back to the host TZ so a transient lookup can't break the launch.
|
||||
- The egress IP is mapped to its IANA zone with an offline mmdb (`daijro/geoip-all-in-one`). It auto-updates against the upstream weekly rebuild: cached locally, re-checked after `GEOIP_REFRESH_DAYS` (7), older copies pruned, and a stale cache is reused when offline. `STEALTHFOX_GEOIP_MMDB` points at your own `.mmdb` to skip the download.
|
||||
- `resolve_session_timezone(timezone, proxy)` and `ensure_geoip_mmdb()` re-exported at the package root (plus `GeoTimezoneError`) so integrations that own their launch can reproduce the resolution.
|
||||
- `tests/test_geo.py` (37) + `tests/test_geoip_update.py` (freshness / auto-update / offline fallback) unit tests.
|
||||
|
||||
### Changed
|
||||
- New runtime dependencies: `requests[socks]` (SOCKS egress lookup), `maxminddb` (mmdb reader), `tzdata` (IANA database for `zoneinfo`, which Windows lacks).
|
||||
|
||||
## [0.2.0] - 2026-05-28
|
||||
|
||||
### Added
|
||||
- Public config helpers in `invisible_playwright.config`: `get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display)` returns a complete `firefox_user_prefs` dict; `get_default_args()` returns the baseline CLI args list (currently empty). Both also re-exported at the package root.
|
||||
- `invisible_playwright.ensure_binary` re-exported at the package root for parity with the `cloakbrowser.download.ensure_binary` integration pattern that downstream projects (Skyvern, Crawlee, agno) already expect.
|
||||
- These helpers let third-party fetchers (changedetection.io plugins, Crawlee `BrowserPool` subclasses, agno toolkits) drive `playwright.firefox.launch(executable_path=..., firefox_user_prefs=...)` themselves without depending on the `InvisiblePlaywright` context manager owning the lifecycle.
|
||||
- `tests/unit/test_config_public.py`: 14 unit tests covering deterministic seed, locale / timezone / pin / extra_prefs / humanize variations, and round-trip via the public namespace.
|
||||
|
||||
### Unchanged
|
||||
- `InvisiblePlaywright` context manager surface is identical (backwards compatible).
|
||||
- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build.
|
||||
|
||||
## [0.1.8] - 2026-05-23
|
||||
|
||||
### Fixed
|
||||
- [#20](https://github.com/feder-cr/invisible_playwright/issues/20): cross-origin iframes were unreachable from Playwright. `element_handle.content_frame()` returned `None`, `frame.evaluate()` threw cross-origin SOP errors, and `frame_locator(...).click()` timed out even with `force=True`. Root cause: FF150 defaults `fission.webContentIsolationStrategy=1` (`IsolateEverything`), which site-isolates every cross-origin iframe into a separate `webIsolated` content process even when `fission.autostart=False`. The parent's Juggler FrameTree then has a Frame placeholder with no docShell and no URL — every protocol op that needs to enter the iframe fails. Fix: pin `fission.webContentIsolationStrategy=0` (`IsolateNothing`) in the baseline prefs. The setting can be flipped back per session via `extra_prefs={"fission.webContentIsolationStrategy": 1}`.
|
||||
|
||||
### Added
|
||||
- `tests/test_cross_origin_iframe.py`: 4 unit + 5 e2e regression sentinels for cross-origin iframe interaction. The e2e layer runs entirely offline against two local HTTP servers on `127.0.0.1` (two ports = two SOP origins) and covers `page.frames` URL tracking, `content_frame()`, `frame.evaluate()`, `frame_locator(...).locator(...)`, and end-to-end `dispatch_event("click")` for plain, sandboxed and titled iframes. A future FF upgrade or fingerprint A/B that flips the pref back to `1` will fail the suite before shipping.
|
||||
|
||||
### Unchanged
|
||||
- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build was needed.
|
||||
|
||||
## [0.1.7] - 2026-05-21
|
||||
|
||||
### Fixed
|
||||
- [#18](https://github.com/feder-cr/invisible_playwright/issues/18): Tab crash when running with `headless=True` on Windows on pages that trigger cross-process navigation. Two separate bugs that only manifested together: (1) the Chromium content sandbox at default level 6 puts content processes on `kAlternateWinstation`, but the wrapper hides the browser window on its own alt-desktop (`CreateDesktop` for headless on Windows). Mismatched desktops → cross-process navigations couldn't reparent windows → content process exits cleanly and Playwright fires `page.on('crash')`. (2) The canvas2d `getImageData` stealth spoof wrote to a read-only mapped `DataSourceSurface`. On GPU-backed canvases that memory is write-protected → segfault during the final `getImageData` at page unload. Wrapper now sets `security.sandbox.content.level=4` in the alt-desktop workaround set, and `firefox-7` ships the source fix that moves the noise to the JS array's writable backing buffer.
|
||||
|
||||
### Changed
|
||||
- `BINARY_VERSION` bumped from `firefox-5` to `firefox-7`. `firefox-6` was rolled back when its partial fix turned out to be wrong (the iframe-burst hypothesis was a dead end; bisection in the evening found the real two-bug cause documented above).
|
||||
|
||||
## [0.1.6] - 2026-05-21
|
||||
|
||||
### Added
|
||||
- `profile_dir=` kwarg on `InvisiblePlaywright` (sync + async). When set, the session uses `firefox.launch_persistent_context()` so cookies, localStorage, sessionStorage, extensions, cache and prefs are kept on disk between runs. `__enter__` returns a `BrowserContext` directly: `with InvisiblePlaywright(profile_dir=p) as ctx: ctx.new_page()`. Pair with a stable `seed=` to also pin the fingerprint identity across runs. First run creates the dir; subsequent runs reuse it.
|
||||
|
||||
### Fixed
|
||||
- `launch_persistent_context(timezone_id="…")` no longer times out at 180s. Root cause: `juggler/content/main.js` calls `docShell.overrideTimezone(...)` on every navigation; the patched Firefox up to firefox-4 didn't expose that IDL method on `nsIDocShell`, so the call threw `TypeError: docShell.overrideTimezone is not a function`. On the non-persistent path the error fired *after* launch and was harmless; on the persistent path it blocked the launch handshake. `firefox-5` ships the C++ method (see `patch.md` section 19); this release removes the firefox-4 era Python workaround that was filtering `locale`/`timezone_id` out of the persistent context kwargs.
|
||||
|
||||
### Changed
|
||||
- `BINARY_VERSION` bumped from `firefox-4` to `firefox-5`. The Python source delta is JS/Python only; the new Firefox build adds 50 lines of C++ in `docshell/base/nsIDocShell.idl` + `nsDocShell.cpp`.
|
||||
|
||||
## [0.1.5] - 2026-05-20
|
||||
|
||||
### Fixed
|
||||
- [#15](https://github.com/feder-cr/invisible_playwright/pull/15): `python -m invisible_playwright fetch` raised `RuntimeError: no SHA256 for firefox-150.0.1-stealth-linux-x86_64.tar.gz in checksums.txt` for every user because the parser kept the `*` binary-mode prefix that `sha256sum` writes in front of filenames. Now `.lstrip("*")` is applied to the key. Reporter + patch: [@LostBoxArt](https://github.com/LostBoxArt). Unrelated to the `firefox-N` binary; existing caches still work, only first-time fetches were broken.
|
||||
|
||||
## [0.1.4] - 2026-05-20
|
||||
|
||||
### Fixed
|
||||
- [#13](https://github.com/feder-cr/invisible_playwright/issues/13): every page that threw an uncaught JS error (e.g. bunny.net) crashed the Playwright client with `TypeError: Cannot read properties of undefined (reading 'url')`. Root cause: upstream Playwright Juggler added a required `location` field to the `Page.uncaughtError` event in the 2026-05-07 roll ([microsoft/playwright@c8604ec](https://github.com/microsoft/playwright/commit/c8604ecd97)); our fork was carrying the pre-roll schema in every `firefox-N` build. Fix matches upstream — Runtime.js builds the `errorLocation`, PageAgent.js forwards it on both worker and runtime error paths, Protocol.js declares the schema field. Reporter: [@dionorgua](https://github.com/dionorgua).
|
||||
|
||||
### Changed
|
||||
- `BINARY_VERSION` bumped from `firefox-3` to `firefox-4`. JS-only change inside `chrome/juggler/`; `xul.dll` and `firefox.exe` are byte-identical to `firefox-3`.
|
||||
|
||||
## [0.1.3] - 2026-05-19
|
||||
|
||||
### Changed
|
||||
- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible-firefox#stealth/150](https://github.com/feder-cr/invisible-firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`).
|
||||
- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible_firefox#stealth/150](https://github.com/feder-cr/invisible_firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`).
|
||||
- The patched Firefox archive now ships the **proper C++ implementation** of `windowUtils.jugglerSendMouseEvent`, replacing the JS shim from 0.1.2.
|
||||
|
||||
### C++ fixes landed in this release
|
||||
|
|
@ -20,7 +84,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|||
- **C7 (partial)**: storage stub for `nsIDocShell.languageOverride`. Workaround `InvisiblePlaywright(locale="")` recommended until full BC FIELD port lands.
|
||||
|
||||
### Verified
|
||||
- Both archives built from same source: feder-cr/invisible-firefox commit `68906f1f9c55`.
|
||||
- Both archives built from same source: feder-cr/invisible_firefox commit `68906f1f9c55`.
|
||||
- Windows + Linux smoke suite green: launch, `ctx.new_page()`, `page.mouse.{move,down,up,click,wheel}`, `navigator.webdriver=false`, sannysoft 32/33 PASS.
|
||||
- SHA256 published in `checksums.txt` on the `firefox-3` release.
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ Thanks for your interest in improving this project. Contributions are welcome vi
|
|||
- **Bug?** Open a [bug report](https://github.com/feder-cr/invisible_playwright/issues/new?template=bug_report.yml).
|
||||
- **Idea?** Open a [feature request](https://github.com/feder-cr/invisible_playwright/issues/new?template=feature_request.yml).
|
||||
- **Security issue?** Do **not** open a public issue — see [SECURITY.md](SECURITY.md).
|
||||
- **The C++ patches** live in the companion repo [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there.
|
||||
- **The C++ patches** live in the companion repo [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there.
|
||||
|
||||
## Scope
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ This repository ships the **Python wrapper** (`invisible_playwright`) around a p
|
|||
- Binary download/caching, CLI, proxy plumbing
|
||||
- Tests, docs, examples, packaging
|
||||
|
||||
Out of scope (belongs in `invisible-firefox`):
|
||||
Out of scope (belongs in `invisible_firefox`):
|
||||
|
||||
- Changes to the Firefox C++ source
|
||||
- New preferences exposed by the patched binary
|
||||
|
|
@ -65,7 +65,7 @@ Before opening, please:
|
|||
|
||||
- Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues) — the bug may already be tracked.
|
||||
- Reproduce on the **latest release** if possible.
|
||||
- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible-firefox` instead.
|
||||
- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible_firefox` instead.
|
||||
|
||||
Include:
|
||||
|
||||
|
|
|
|||
87
README.md
87
README.md
|
|
@ -6,56 +6,26 @@
|
|||
[](https://www.mozilla.org/firefox/)
|
||||
[](https://github.com/feder-cr/invisible_playwright/releases)
|
||||
[](https://github.com/feder-cr/invisible_playwright/stargazers)
|
||||
[](https://github.com/feder-cr/invisible_firefox/releases/tag/usage-counter)
|
||||
|
||||
[](https://it.linkedin.com/in/federico-elia-5199951b6)
|
||||
|
||||
A patched Firefox **100% Playwright-compatible** that passes the hardest browser-fingerprint detectors in the wild.
|
||||
**Stealth Firefox that passes every bot detection test. Drop-in Playwright replacement, fingerprint patched at the C++ level, not a JavaScript shim.**
|
||||
|
||||

|
||||
|
||||
## Results
|
||||
|
||||
### Google reCAPTCHA v3 - **0.90 / 1.0**
|
||||
|
||||
Top-tier score. Google classifies the session as "very likely a human". Most anti-detect stacks plateau around 0.3-0.7.
|
||||
|
||||

|
||||
|
||||
### Fingerprint Pro - **bot: not detected, VPN: false, tampering: false, dev tools: not detected**
|
||||
|
||||
FingerprintJS Pro's full Smart Signals battery flips every flag to "Not detected". Browser correctly identified as Firefox 150 on Windows 10. Confidence score 0.9.
|
||||
|
||||

|
||||
|
||||
### CreepJS - **0 lies**, fingerprint is internally coherent
|
||||
|
||||
No contradictions between headless hints, spoofed values, and real rendering output. That "0 lies" is what kills most anti-detect browsers: one inconsistency (e.g. Chrome UA + Firefox WebGL) and the trust score collapses.
|
||||
|
||||

|
||||
|
||||
### BrowserLeaks WebRTC - **no public IP leak**
|
||||
|
||||
WebRTC srflx address is the proxy egress IP; host candidates are private LAN. The real public IP never leaks via STUN, even on pages that configure their own ICE servers. Stock Firefox exposes an mDNS hostname (e.g. `abc-1234.local`) as a host ICE candidate, which is itself a stable per-session signal detectors fingerprint. invisible_playwright replaces host candidates with synthetic private-LAN IPs that match the spoofed network, removing the mDNS tell.
|
||||
|
||||

|
||||
|
||||
### bot.sannysoft.com - **all checks pass**
|
||||
|
||||
Every row green: WebDriver not present, Chrome-only properties absent, plugin/mime/languages arrays coherent, permissions API correct, iframe/source window checks pass.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## Why it's powerful
|
||||
|
||||
**Most anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems:
|
||||
|
||||
**Most other anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems:
|
||||
|
||||
1. **JS patches are detectable.** Anti-bots enumerate native function `.toString()`, check descriptor configurability, compare property enumeration order, watch for prototype mutations. Every patch leaves a fingerprint of its own. CreepJS has an entire battery of "lies detectors" built around this.
|
||||
2. **Chromium itself is now suspect.** Residential-proxy bot traffic is overwhelmingly Chromium-based, so detectors weight anything Chromium-shaped as risky by default. Chromium-based forks inherit Chrome's open-source layers (BoringSSL, Blink, V8, ANGLE) cleanly, but they still cannot fully match Chrome in practice: Chrome ships closed-source components on top (Widevine, proprietary codecs, Google Update / Safe Browsing endpoints) that flip detectable JS feature flags and network signals, and forks lag Chrome's release cadence by days to weeks, leaving telltale version-specific behaviours that detectors lock onto.
|
||||
|
||||
**invisible_playwright patches Firefox at the C++ level.** The spoofed values come back out through the normal Gecko paths - there is no JS shim, no override, no `Object.defineProperty`. **From the page's point of view, the browser is just telling the truth.** Anti-bot lie-detectors have nothing to latch onto.
|
||||
|
||||
invisible_playwright spoofs **all the layers that matter, together, coherently** — Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) for the full per-layer breakdown of which C++ files are patched and why.
|
||||
invisible_playwright spoofs **all the layers that matter, together, coherently**: Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) for the full per-layer breakdown of which C++ files are patched and why.
|
||||
|
||||
Everything is driven by preferences - no hardcoded values in the binary. You change one pref, you change the spoofed value.
|
||||
|
||||
|
|
@ -63,23 +33,21 @@ Everything is driven by preferences - no hardcoded values in the binary. You cha
|
|||
|
||||
## How it compares
|
||||
|
||||
Commercial anti-detect browsers (Multilogin Mimic, GoLogin Orbita, AdsPower, Dolphin Anty) ship patched Chromium and apply most spoofing at the JavaScript layer. A few (Kameleo, Multilogin Stealthfox) also offer Firefox-based profiles, but the spoofing pattern is the same: runtime overrides on top of an unmodified rendering engine. That's the ceiling - and it's a low one.
|
||||
**CloakBrowser** ships a similar pitch for Chromium, but its binary is **closed source** (the source-level patches are not published, you only get the compiled output), and it still hits the Chromium reCAPTCHA ceiling. The commercial anti-detect browsers (**Multilogin**, **GoLogin**, AdsPower, Dolphin, Kameleo) are paid SaaS that overlay JS-layer spoofing on a patched Chromium. Managed profiles are nice but raw detection bypass sits below both Camoufox and us.
|
||||
|
||||
| | invisible_playwright | Multilogin / GoLogin | AdsPower / Dolphin | Kameleo |
|
||||
| | invisible_playwright | Camoufox | CloakBrowser | Multilogin |
|
||||
|---|---|---|---|---|
|
||||
| Engine | Firefox (open source) | Chromium fork | Chromium fork | Chromium |
|
||||
| Patch depth | C++ source | JS overrides | JS overrides | JS overrides |
|
||||
| `.toString()` clean | ✅ Native Gecko path | ❌ Detectable shims | ❌ Detectable shims | ❌ Detectable shims |
|
||||
| Canvas / WebGL | ✅ C++ level | ⚠️ JS override | ⚠️ JS override | ⚠️ JS override |
|
||||
| SOCKS5 auth | ✅ Patched | ⚠️ Varies | ⚠️ Varies | ❌ |
|
||||
| Self-hosted | ✅ | ❌ SaaS | ❌ SaaS | ❌ Cloud |
|
||||
| reCAPTCHA v3 score | **0.90** | ~0.3-0.6 | ~0.3-0.5 | ~0.3-0.5 |
|
||||
| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected |
|
||||
| FP Pro - tampering | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected |
|
||||
| FP Pro - VPN flag | ✅ false | ❌ true | ❌ true | ❌ true |
|
||||
| CreepJS lies | ✅ 0 | ❌ multiple | ❌ multiple | ❌ multiple |
|
||||
|
||||
Competitor scores reflect our own testing on Windows 10 against the same five detection suites used above; results may vary with their evolving builds.
|
||||
| Engine | Firefox 150 | Firefox (~1 year old base) | Chromium | Chromium fork |
|
||||
| Patch depth | C++ source | C++ source | C++ source | JS overrides |
|
||||
| Maintenance | Active | Gap (~1 year) | Active | Active SaaS |
|
||||
| Open source | ✅ MIT | ✅ MPL | ❌ Closed source | ❌ Closed source |
|
||||
| `.toString()` clean | ✅ | ✅ | ✅ | ❌ Detectable shims |
|
||||
| Canvas / WebGL / Audio | ✅ C++ | ⚠️ Drift vs current FF | ✅ C++ | ⚠️ JS override |
|
||||
| SOCKS5 auth | ✅ Patched | ❌ | ⚠️ Playwright proxy | ⚠️ Varies |
|
||||
| **reCAPTCHA v3 score** | **0.90** | ~0.3-0.5 | ~0.3-0.5 | ~0.3-0.6 |
|
||||
| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected |
|
||||
| CreepJS lies | ✅ 0 | ❌ Multiple | ✅ 0 | ❌ Multiple |
|
||||
| Cost | Free | Free | Free | From $99/mo |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -172,6 +140,21 @@ with InvisiblePlaywright(proxy=proxy) as browser:
|
|||
|
||||
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
|
||||
|
||||
### Timezone
|
||||
|
||||
The browser timezone follows `timezone=`:
|
||||
|
||||
```python
|
||||
# default: timezone is auto-derived from the egress IP (proxy egress if a
|
||||
# proxy is set, otherwise the host's own public IP)
|
||||
with InvisiblePlaywright(proxy=proxy) as browser:
|
||||
...
|
||||
|
||||
# explicit IANA zone always wins — the only way to force a specific zone
|
||||
with InvisiblePlaywright(proxy=proxy, timezone="America/New_York") as browser:
|
||||
...
|
||||
```
|
||||
|
||||
### Pinning specific fingerprint fields
|
||||
|
||||
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:
|
||||
|
|
@ -215,4 +198,4 @@ invisible_playwright takes a different angle than the major Firefox-hardening pr
|
|||
|
||||
## License
|
||||
|
||||
MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox).
|
||||
MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox).
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ In scope:
|
|||
|
||||
Out of scope here (report to the relevant project):
|
||||
|
||||
- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox/security/advisories/new)
|
||||
- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox/security/advisories/new)
|
||||
- Vulnerabilities in upstream Firefox / mozilla-central — report to Mozilla per https://www.mozilla.org/security/
|
||||
- Vulnerabilities in third-party dependencies (`playwright`, `requests`, etc.) — report to those projects directly
|
||||
|
||||
|
|
|
|||
BIN
docs/screenshots/hero.gif
Normal file
BIN
docs/screenshots/hero.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 479 KiB |
|
|
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||
|
||||
[project]
|
||||
name = "invisible-playwright"
|
||||
version = "0.1.2"
|
||||
version = "0.2.0"
|
||||
description = "Playwright wrapper for a patched Firefox with deterministic stealth profile."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
|
@ -22,13 +22,15 @@ classifiers = [
|
|||
dependencies = [
|
||||
"playwright>=1.40",
|
||||
"platformdirs>=4",
|
||||
"requests>=2.31",
|
||||
"requests[socks]>=2.31",
|
||||
"maxminddb>=2.2",
|
||||
"tzdata>=2024.1",
|
||||
"tqdm>=4.66",
|
||||
"pywin32>=306; sys_platform == 'win32'",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7", "pytest-mock>=3", "responses>=0.24", "build>=1"]
|
||||
dev = ["pytest>=7", "pytest-mock>=3", "responses>=0.24", "build>=1", "pytest-rerunfailures>=14", "playwright>=1.40"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
markers = [
|
||||
|
|
@ -39,6 +41,12 @@ markers = [
|
|||
"linux_only: tests that require Linux platform",
|
||||
]
|
||||
addopts = "-m 'not slow and not e2e'"
|
||||
# tests/playwright-upstream/ is a vendored Microsoft Playwright test suite
|
||||
# used for compatibility verification on demand. It has its own deps
|
||||
# (pixelmatch with API not matching our version) and a conftest that fails
|
||||
# collection in our env. Run it explicitly with --override-ini for compat
|
||||
# audits, not on every push.
|
||||
norecursedirs = ["playwright-upstream"]
|
||||
|
||||
[project.scripts]
|
||||
invisible-playwright = "invisible_playwright.cli:main"
|
||||
|
|
|
|||
172
scripts/ci_drive_gate.py
Normal file
172
scripts/ci_drive_gate.py
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
#!/usr/bin/env python3
|
||||
"""CI drive gate — the firefox-N catcher.
|
||||
|
||||
A raw `firefox --screenshot` proves nothing about automation: a juggler-less
|
||||
binary renders a screenshot just fine and ships broken (firefox-8 did exactly
|
||||
that). This DRIVES the binary the way users will — Playwright launches it over
|
||||
the juggler pipe and exercises real paths.
|
||||
|
||||
Two levels (see `--full`):
|
||||
|
||||
SMOKE (default — run on ALL 5 legs, on every binary's native runner):
|
||||
launch over juggler-pipe → navigate a real http://127.0.0.1 page → assert a
|
||||
response, the Firefox UA, navigator.webdriver falsy, and a DOM read. This is
|
||||
the firefox-8 catcher (a juggler-less binary throws TargetClosedError on
|
||||
launch) plus a base stealth + drivability check. It is intentionally LIGHT:
|
||||
the free hosted runners — windows-latest especially — are content-process
|
||||
unstable under a heavy headless interaction sequence (clicks/moves cascade
|
||||
into "context destroyed" / selector-timeout / eval-CSP), so the gate that
|
||||
must be GREEN on every leg stays minimal and reliable.
|
||||
|
||||
FULL (`--full` — run on the historically-reliable Linux leg):
|
||||
SMOKE plus mouse + keyboard input (firefox-2 / issue #9:
|
||||
jugglerSendMouseEvent/synthesizeMouseEvent), canvas determinism (stealth
|
||||
seed must be per-session), and navigator-surface tells. The interaction code
|
||||
is platform-identical JS (it lives in omni.ja), so exercising it on one
|
||||
reliable leg catches a regression for ALL platforms; win interaction is
|
||||
additionally covered by local pre-release testing.
|
||||
|
||||
NOT covered here: WebGL determinism (needs SWGL, false-fails headless) and the
|
||||
faithful cross-origin iframe test (issue #20) — both live in the local realness
|
||||
gate. All checks here are headless, no screenshot (GPU-free), loopback-only
|
||||
(no external network / proxy / secrets) → safe in public CI.
|
||||
|
||||
Robustness: a real loopback HTTP page (NOT data: / about:blank — those get
|
||||
re-normalized / carry an eval-blocking CSP), arrow-function evaluates (never
|
||||
eval'd), and up to 2 retries on transient context-destroyed/detached/timeout.
|
||||
A genuinely broken binary fails ALL attempts → the gate fails.
|
||||
|
||||
Usage: python ci_drive_gate.py <firefox-binary> [--full]
|
||||
Exit 0 + "DRIVE GATE OK ..." on success; non-zero with a reason on failure.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import http.server
|
||||
import socketserver
|
||||
import sys
|
||||
import threading
|
||||
|
||||
HTML = (
|
||||
"<!doctype html><html><head><title>dt</title></head><body>"
|
||||
"<h1 id=x>hello-drive</h1>"
|
||||
"<button id=b>go</button>"
|
||||
"<input id=inp>"
|
||||
"<script>"
|
||||
"window.__clicked=0;window.__moves=0;"
|
||||
"document.getElementById('b').addEventListener('click',function(){window.__clicked=1;});"
|
||||
"window.addEventListener('mousemove',function(){window.__moves++;});"
|
||||
"</script>"
|
||||
"</body></html>"
|
||||
).encode()
|
||||
|
||||
CANVAS_DRAW = (
|
||||
"() => {const c=document.createElement('canvas');c.width=c.height=16;"
|
||||
"const g=c.getContext('2d');g.fillStyle='#08f';g.fillRect(0,0,16,16);"
|
||||
"g.fillStyle='#f40';g.fillText('s',2,12);return c.toDataURL();}"
|
||||
)
|
||||
|
||||
_TRANSIENT = ("context was destroyed", "frame was detached", "target closed",
|
||||
"because of a navigation", "timeout", "blocked by csp")
|
||||
|
||||
|
||||
class _Handler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self): # noqa: N802
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(HTML)))
|
||||
self.end_headers()
|
||||
self.wfile.write(HTML)
|
||||
|
||||
def log_message(self, *a): # silence per-request stderr noise
|
||||
pass
|
||||
|
||||
|
||||
def _start_server():
|
||||
srv = socketserver.TCPServer(("127.0.0.1", 0), _Handler)
|
||||
threading.Thread(target=srv.serve_forever, daemon=True).start()
|
||||
return srv, srv.server_address[1]
|
||||
|
||||
|
||||
def _drive(exe: str, url: str, full: bool) -> str:
|
||||
"""One full drive attempt. Returns the UA on success; raises on failure."""
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.firefox.launch(executable_path=exe, headless=True)
|
||||
try:
|
||||
page = browser.new_page()
|
||||
resp = page.goto(url, wait_until="load")
|
||||
assert resp and resp.ok, f"navigation to {url} failed: {resp.status if resp else 'no response'}"
|
||||
ua = page.evaluate("() => navigator.userAgent")
|
||||
webdriver = page.evaluate("() => navigator.webdriver")
|
||||
text = page.evaluate("() => document.getElementById('x').textContent")
|
||||
|
||||
inter = {}
|
||||
if full:
|
||||
# firefox-2 / issue-#9 catcher: real mouse + keyboard over juggler.
|
||||
page.wait_for_selector("#b")
|
||||
page.mouse.move(20, 20)
|
||||
page.mouse.move(120, 90) # synthesizeMouseEvent path
|
||||
page.click("#b") # mousedown/up/click → listener fires
|
||||
page.click("#inp")
|
||||
page.keyboard.type("ok")
|
||||
inter["clicked"] = page.evaluate("() => window.__clicked")
|
||||
inter["moves"] = page.evaluate("() => window.__moves")
|
||||
inter["typed"] = page.evaluate("() => document.getElementById('inp').value")
|
||||
inter["canvas_a"] = page.evaluate(CANVAS_DRAW)
|
||||
inter["canvas_b"] = page.evaluate(CANVAS_DRAW)
|
||||
inter["langs"] = page.evaluate("() => navigator.languages.length")
|
||||
inter["plugins"] = page.evaluate("() => navigator.plugins instanceof PluginArray")
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
# SMOKE asserts (always).
|
||||
assert "Firefox" in ua, f"unexpected UA (binary not driving correctly): {ua!r}"
|
||||
assert text == "hello-drive", f"DOM/JS roundtrip failed: {text!r}"
|
||||
assert not webdriver, f"navigator.webdriver leaked True (stealth regression): {webdriver!r}"
|
||||
|
||||
if full:
|
||||
assert inter["clicked"] == 1, "page.click() did not fire the click listener — mouse-event synthesis broken (firefox-2 class)"
|
||||
assert inter["moves"] >= 1, "page.mouse.move() produced no mousemove — jugglerSendMouseEvent regression"
|
||||
assert inter["typed"] == "ok", f"page.keyboard.type() failed: {inter['typed']!r}"
|
||||
assert inter["canvas_a"] == inter["canvas_b"], "canvas non-deterministic across identical draws (stealth seed broken → bot tell)"
|
||||
assert inter["langs"] and inter["langs"] > 0, "navigator.languages empty (headless tell)"
|
||||
assert inter["plugins"], "navigator.plugins is not a PluginArray (headless tell)"
|
||||
return ua
|
||||
|
||||
|
||||
def main(exe: str, full: bool) -> int:
|
||||
srv, port = _start_server()
|
||||
url = f"http://127.0.0.1:{port}/"
|
||||
level = "full" if full else "smoke"
|
||||
extras = "http+click+mousemove+keyboard+canvas-determinism+navsurface" if full else "http+ua+webdriver+dom"
|
||||
last = None
|
||||
try:
|
||||
for attempt in (1, 2, 3):
|
||||
try:
|
||||
ua = _drive(exe, url, full)
|
||||
if attempt > 1:
|
||||
print(f"(note: drive succeeded on attempt {attempt} after a transient error)")
|
||||
print(f"DRIVE GATE OK [{level}] | UA={ua} | {extras}=ok")
|
||||
return 0
|
||||
except Exception as e: # noqa: BLE001 — gate: any failure must surface
|
||||
last = e
|
||||
msg = str(e).lower()
|
||||
if attempt < 3 and any(t in msg for t in _TRANSIENT):
|
||||
print(f"(transient error on attempt {attempt}, retrying): {e}", file=sys.stderr)
|
||||
continue
|
||||
break
|
||||
finally:
|
||||
srv.shutdown()
|
||||
print(f"DRIVE GATE FAILED [{level}]: {last}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = sys.argv[1:]
|
||||
full = "--full" in args
|
||||
positional = [a for a in args if not a.startswith("--")]
|
||||
if len(positional) != 1:
|
||||
print("usage: ci_drive_gate.py <path-to-firefox-binary> [--full]", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
sys.exit(main(positional[0], full))
|
||||
1
scripts/playwright_pin.txt
Normal file
1
scripts/playwright_pin.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
1.55.0
|
||||
67
scripts/run_e2e.py
Normal file
67
scripts/run_e2e.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Run the FULL e2e suite (every test that opens the browser) against a binary.
|
||||
|
||||
The 127 ``@pytest.mark.e2e`` tests are excluded from the default `pytest` run
|
||||
(`addopts = -m 'not slow and not e2e'`) because they need a real Firefox binary
|
||||
and a display, and they skip themselves when no binary is available. That makes
|
||||
them easy to forget — and "we can't afford for something to not work". This is
|
||||
the gate that runs them all, deliberately, against a chosen binary.
|
||||
|
||||
It is the MANDATORY pre-release e2e gate: run it green against the freshly-built
|
||||
release binary BEFORE un-drafting a firefox-N (alongside the fppro + WebRTC
|
||||
realness gates). It is NOT in the public CI drive-gate — the hosted runners are
|
||||
content-process unstable under a heavy headless interaction sequence (see
|
||||
70-known-bugs / 60-ci-release-pipeline); this runs locally on reliable hardware.
|
||||
|
||||
Flake-resilience: under full-suite load a couple of interaction tests (dblclick,
|
||||
hover/mouseenter) can flake even though they pass 3/3 in isolation, so failures
|
||||
are reran up to twice on the known transient signatures. A genuinely broken
|
||||
binary fails all attempts. The webrtc e2e fake a TCP-only SOCKS locally (no
|
||||
proxy/secrets), so the whole suite is offline.
|
||||
|
||||
Usage:
|
||||
python scripts/run_e2e.py <firefox-binary>
|
||||
python scripts/run_e2e.py # uses $INVPW_BINARY_PATH
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_RERUN_SIGNATURES = "Timeout|context was destroyed|was detached|not visible|because of a navigation|TargetClosed"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
binary = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("INVPW_BINARY_PATH")
|
||||
if not binary:
|
||||
print("usage: run_e2e.py <firefox-binary> (or set INVPW_BINARY_PATH)", file=sys.stderr)
|
||||
return 2
|
||||
if not Path(binary).exists():
|
||||
print(f"ERROR: binary not found: {binary}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
env = dict(os.environ)
|
||||
# One setting drives the whole suite: conftest's firefox_binary fixture and
|
||||
# the webrtc e2e both resolve from these.
|
||||
env["INVPW_BINARY_PATH"] = binary
|
||||
env["STEALTHFOX_E2E_BINARY"] = binary
|
||||
|
||||
repo = Path(__file__).resolve().parent.parent
|
||||
cmd = [
|
||||
sys.executable, "-m", "pytest",
|
||||
"-m", "e2e",
|
||||
"-o", "addopts=", # override the default 'not e2e' deselection
|
||||
"--reruns", "2", "--reruns-delay", "1",
|
||||
"--only-rerun", _RERUN_SIGNATURES,
|
||||
"-p", "no:cacheprovider",
|
||||
"-q", "--tb=short",
|
||||
] + sys.argv[2:]
|
||||
print(f"[run_e2e] binary={binary}")
|
||||
print(f"[run_e2e] {' '.join(cmd)}")
|
||||
return subprocess.run(cmd, cwd=repo, env=env).returncode
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -15,8 +15,30 @@ Quickstart:
|
|||
page = browser.new_page()
|
||||
page.click("#submit") # expanded into a Bezier trajectory
|
||||
"""
|
||||
from .launcher import InvisiblePlaywright
|
||||
from .config import get_default_args, get_default_stealth_prefs
|
||||
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
||||
from ._geo import GeoTimezoneError, resolve_session_timezone
|
||||
from .download import ensure_binary, ensure_geoip_mmdb
|
||||
from .launcher import InvisiblePlaywright
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__all__ = ["InvisiblePlaywright", "BINARY_VERSION", "FIREFOX_UPSTREAM_VERSION", "__version__"]
|
||||
from importlib.metadata import PackageNotFoundError, version as _pkg_version
|
||||
|
||||
try:
|
||||
__version__ = _pkg_version("invisible-playwright")
|
||||
except PackageNotFoundError:
|
||||
# Editable / source checkout without an install record: fall back to a
|
||||
# marker rather than risk shipping a stale hardcoded string.
|
||||
__version__ = "0.0.0+unknown"
|
||||
|
||||
__all__ = [
|
||||
"InvisiblePlaywright",
|
||||
"ensure_binary",
|
||||
"ensure_geoip_mmdb",
|
||||
"get_default_stealth_prefs",
|
||||
"get_default_args",
|
||||
"resolve_session_timezone",
|
||||
"GeoTimezoneError",
|
||||
"BINARY_VERSION",
|
||||
"FIREFOX_UPSTREAM_VERSION",
|
||||
"__version__",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -84,6 +84,12 @@ _FONT_POOL = _load("font_pool.json")
|
|||
_FONT_CORE: list = _FONT_POOL["core"]
|
||||
_FONT_OPTIONAL: list = _FONT_POOL["optional"]
|
||||
_CPT_FONTS_OPT = _load("cpt_fonts_optional_given_class.json")["table"]
|
||||
# Browsing-history pool + CPT (per-class probabilities for visited sites).
|
||||
# Drives _recaptcha_seed's cookie pre-seed: each persona ends up with a
|
||||
# coherent list of ~15-30 visited sites whose categories correlate with
|
||||
# gpu_class (workstation → dev-heavy, integrated_old → shop+news-heavy).
|
||||
_BROWSING_POOL: list = _load("browsing_pool.json")["entries"]
|
||||
_CPT_BROWSING = _load("cpt_browsing_given_class.json")["table"]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
|
@ -282,6 +288,33 @@ def derive_font_whitelist(gpu_class: str, rng) -> str:
|
|||
return derive_font_prefs(gpu_class, rng)["whitelist"]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# BROWSING HISTORY (Bayesian: per-site P(visited|gpu_class))
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
def derive_browsing_history(gpu_class: str, rng) -> list:
|
||||
"""Sample which sites this persona has visited recently.
|
||||
|
||||
Each site in the pool has a per-class probability (CPT). We sample
|
||||
independently per-site, producing a list of dicts:
|
||||
[{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, ...]
|
||||
|
||||
Sum of CPT probabilities per class is tuned to land ~15-30 visited sites
|
||||
on average — an established-user signature. Sorted by name for stable
|
||||
output across runs of the same seed.
|
||||
"""
|
||||
cpt = _CPT_BROWSING.get(gpu_class)
|
||||
if cpt is None:
|
||||
cpt = _CPT_BROWSING["mid_range"]
|
||||
visited: list = []
|
||||
for entry in _BROWSING_POOL:
|
||||
name = entry["name"]
|
||||
p = cpt.get(name, 0.3) # default 0.3 for missing CPT row
|
||||
if rng.random() < p:
|
||||
visited.append(dict(entry)) # copy to avoid mutating pool
|
||||
visited.sort(key=lambda e: e["name"])
|
||||
return visited
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# PUBLIC API: Forge
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
|
@ -350,6 +383,12 @@ class Forge:
|
|||
bundle["gpu_class"], self._rng
|
||||
).items()
|
||||
},
|
||||
# Bayesian browsing history (per-class P(visited|gpu_class)).
|
||||
# Consumed by _recaptcha_seed.py to seed coherent cookie history
|
||||
# when invisible_playwright is launched with prep_recaptcha=True.
|
||||
"browsing_history": derive_browsing_history(
|
||||
bundle["gpu_class"], self._rng
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
64
src/invisible_playwright/_fpforge/data/browsing_pool.json
Normal file
64
src/invisible_playwright/_fpforge/data/browsing_pool.json
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
{
|
||||
"_comment": [
|
||||
"Pool of everyday websites used by the browsing_history node.",
|
||||
"Each entry: { name, category, cookie_profile }.",
|
||||
"- name: bare domain (no scheme, no leading dot).",
|
||||
"- category: dev / shop / news / reference / media / community / misc.",
|
||||
"- cookie_profile: short tag pointing to a cookie-template recipe used by",
|
||||
" _recaptcha_seed.py to generate concrete cookies (so heavy-analytics sites",
|
||||
" get _ga+_gid+OneTrust, simple sites get just _ga, dev tools get GH-style).",
|
||||
"Add new entries here + add per-class probabilities in cpt_browsing_given_class.json."
|
||||
],
|
||||
"entries": [
|
||||
{"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"},
|
||||
{"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"},
|
||||
{"name": "mozilla.org", "category": "reference", "cookie_profile": "ga_consent"},
|
||||
{"name": "w3schools.com", "category": "dev", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "mdn.io", "category": "dev", "cookie_profile": "minimal"},
|
||||
{"name": "duckduckgo.com", "category": "reference", "cookie_profile": "minimal"},
|
||||
{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"},
|
||||
{"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "npmjs.com", "category": "dev", "cookie_profile": "ga_consent"},
|
||||
{"name": "gitlab.com", "category": "dev", "cookie_profile": "ga_cf"},
|
||||
{"name": "pypi.org", "category": "dev", "cookie_profile": "minimal"},
|
||||
{"name": "docs.python.org", "category": "dev", "cookie_profile": "minimal"},
|
||||
{"name": "rust-lang.org", "category": "dev", "cookie_profile": "ga_consent"},
|
||||
{"name": "go.dev", "category": "dev", "cookie_profile": "ga_consent"},
|
||||
{"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "ebay.com", "category": "shop", "cookie_profile": "ga_consent"},
|
||||
{"name": "etsy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "bestbuy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "target.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "nytimes.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "cnn.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "bbc.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "theguardian.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "reuters.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "apnews.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "washingtonpost.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "techcrunch.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "theverge.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "arstechnica.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "wired.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "engadget.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "9to5mac.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||
{"name": "medium.com", "category": "community", "cookie_profile": "ga_consent"},
|
||||
{"name": "dev.to", "category": "community", "cookie_profile": "ga_consent"},
|
||||
{"name": "reddit.com", "category": "community", "cookie_profile": "ga_cf"},
|
||||
{"name": "news.ycombinator.com", "category": "community", "cookie_profile": "minimal"},
|
||||
{"name": "quora.com", "category": "community", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "stackexchange.com", "category": "community", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "imdb.com", "category": "media", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "rottentomatoes.com", "category": "media", "cookie_profile": "ga_consent"},
|
||||
{"name": "metacritic.com", "category": "media", "cookie_profile": "ga_consent"},
|
||||
{"name": "allrecipes.com", "category": "misc", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "epicurious.com", "category": "misc", "cookie_profile": "ga_consent"},
|
||||
{"name": "tripadvisor.com", "category": "misc", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "weather.com", "category": "reference", "cookie_profile": "ga_consent"},
|
||||
{"name": "timeanddate.com", "category": "reference", "cookie_profile": "ga_consent"},
|
||||
{"name": "thesaurus.com", "category": "reference", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "kayak.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "booking.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "airbnb.com", "category": "shop", "cookie_profile": "ga_consent"}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
{
|
||||
"_comment": [
|
||||
"Per-class probability that a persona of a given gpu_class has visited each",
|
||||
"site in the pool. Used by the browsing_history node to derive a coherent",
|
||||
"visited-domain list per persona.",
|
||||
"",
|
||||
"Probabilities are tuned so each class samples ~15-30 sites on average",
|
||||
"(sum across all 50 entries falls in that range), giving an established-user",
|
||||
"look. Categories are biased by class:",
|
||||
" - workstation/high_end: higher P(dev) + high P(news/media)",
|
||||
" - mid_range: balanced",
|
||||
" - low_end/integrated_*: lower P(dev), higher P(shop/news/reference)",
|
||||
"",
|
||||
"Missing class falls back to mid_range via Node CPT pool fallback."
|
||||
],
|
||||
"table": {
|
||||
"workstation": {
|
||||
"youtube.com": 0.80, "wikipedia.org": 0.85, "mozilla.org": 0.70,
|
||||
"w3schools.com": 0.40, "mdn.io": 0.55, "duckduckgo.com": 0.45,
|
||||
"github.com": 0.95, "stackoverflow.com": 0.90, "npmjs.com": 0.65,
|
||||
"gitlab.com": 0.50, "pypi.org": 0.55, "docs.python.org": 0.60,
|
||||
"rust-lang.org": 0.35, "go.dev": 0.30,
|
||||
"amazon.com": 0.70, "ebay.com": 0.25, "etsy.com": 0.15,
|
||||
"bestbuy.com": 0.45, "target.com": 0.30,
|
||||
"nytimes.com": 0.55, "cnn.com": 0.40, "bbc.com": 0.55,
|
||||
"theguardian.com": 0.45, "reuters.com": 0.40, "apnews.com": 0.30,
|
||||
"washingtonpost.com": 0.40,
|
||||
"techcrunch.com": 0.65, "theverge.com": 0.60, "arstechnica.com": 0.65,
|
||||
"wired.com": 0.50, "engadget.com": 0.35, "9to5mac.com": 0.30,
|
||||
"medium.com": 0.55, "dev.to": 0.40, "reddit.com": 0.70,
|
||||
"news.ycombinator.com": 0.65, "quora.com": 0.20, "stackexchange.com": 0.60,
|
||||
"imdb.com": 0.45, "rottentomatoes.com": 0.25, "metacritic.com": 0.20,
|
||||
"allrecipes.com": 0.20, "epicurious.com": 0.15, "tripadvisor.com": 0.30,
|
||||
"weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25,
|
||||
"kayak.com": 0.30, "booking.com": 0.35, "airbnb.com": 0.30
|
||||
},
|
||||
"high_end": {
|
||||
"youtube.com": 0.85, "wikipedia.org": 0.80, "mozilla.org": 0.60,
|
||||
"w3schools.com": 0.45, "mdn.io": 0.45, "duckduckgo.com": 0.40,
|
||||
"github.com": 0.85, "stackoverflow.com": 0.80, "npmjs.com": 0.50,
|
||||
"gitlab.com": 0.40, "pypi.org": 0.45, "docs.python.org": 0.50,
|
||||
"rust-lang.org": 0.30, "go.dev": 0.25,
|
||||
"amazon.com": 0.75, "ebay.com": 0.30, "etsy.com": 0.20,
|
||||
"bestbuy.com": 0.50, "target.com": 0.35,
|
||||
"nytimes.com": 0.50, "cnn.com": 0.50, "bbc.com": 0.50,
|
||||
"theguardian.com": 0.40, "reuters.com": 0.35, "apnews.com": 0.30,
|
||||
"washingtonpost.com": 0.35,
|
||||
"techcrunch.com": 0.60, "theverge.com": 0.65, "arstechnica.com": 0.60,
|
||||
"wired.com": 0.50, "engadget.com": 0.40, "9to5mac.com": 0.35,
|
||||
"medium.com": 0.50, "dev.to": 0.35, "reddit.com": 0.75,
|
||||
"news.ycombinator.com": 0.55, "quora.com": 0.25, "stackexchange.com": 0.55,
|
||||
"imdb.com": 0.55, "rottentomatoes.com": 0.35, "metacritic.com": 0.30,
|
||||
"allrecipes.com": 0.25, "epicurious.com": 0.20, "tripadvisor.com": 0.30,
|
||||
"weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25,
|
||||
"kayak.com": 0.30, "booking.com": 0.40, "airbnb.com": 0.30
|
||||
},
|
||||
"mid_range": {
|
||||
"youtube.com": 0.85, "wikipedia.org": 0.75, "mozilla.org": 0.45,
|
||||
"w3schools.com": 0.40, "mdn.io": 0.30, "duckduckgo.com": 0.35,
|
||||
"github.com": 0.55, "stackoverflow.com": 0.55, "npmjs.com": 0.30,
|
||||
"gitlab.com": 0.25, "pypi.org": 0.25, "docs.python.org": 0.30,
|
||||
"rust-lang.org": 0.15, "go.dev": 0.15,
|
||||
"amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30,
|
||||
"bestbuy.com": 0.55, "target.com": 0.40,
|
||||
"nytimes.com": 0.45, "cnn.com": 0.55, "bbc.com": 0.45,
|
||||
"theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30,
|
||||
"washingtonpost.com": 0.30,
|
||||
"techcrunch.com": 0.45, "theverge.com": 0.50, "arstechnica.com": 0.40,
|
||||
"wired.com": 0.45, "engadget.com": 0.35, "9to5mac.com": 0.30,
|
||||
"medium.com": 0.45, "dev.to": 0.25, "reddit.com": 0.70,
|
||||
"news.ycombinator.com": 0.30, "quora.com": 0.35, "stackexchange.com": 0.40,
|
||||
"imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.35,
|
||||
"allrecipes.com": 0.35, "epicurious.com": 0.25, "tripadvisor.com": 0.40,
|
||||
"weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30,
|
||||
"kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40
|
||||
},
|
||||
"low_end": {
|
||||
"youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.35,
|
||||
"w3schools.com": 0.30, "mdn.io": 0.20, "duckduckgo.com": 0.30,
|
||||
"github.com": 0.30, "stackoverflow.com": 0.30, "npmjs.com": 0.15,
|
||||
"gitlab.com": 0.10, "pypi.org": 0.10, "docs.python.org": 0.15,
|
||||
"rust-lang.org": 0.05, "go.dev": 0.05,
|
||||
"amazon.com": 0.85, "ebay.com": 0.50, "etsy.com": 0.40,
|
||||
"bestbuy.com": 0.55, "target.com": 0.45,
|
||||
"nytimes.com": 0.40, "cnn.com": 0.60, "bbc.com": 0.40,
|
||||
"theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.30,
|
||||
"washingtonpost.com": 0.25,
|
||||
"techcrunch.com": 0.30, "theverge.com": 0.35, "arstechnica.com": 0.25,
|
||||
"wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25,
|
||||
"medium.com": 0.35, "dev.to": 0.15, "reddit.com": 0.65,
|
||||
"news.ycombinator.com": 0.15, "quora.com": 0.45, "stackexchange.com": 0.25,
|
||||
"imdb.com": 0.65, "rottentomatoes.com": 0.45, "metacritic.com": 0.35,
|
||||
"allrecipes.com": 0.45, "epicurious.com": 0.30, "tripadvisor.com": 0.45,
|
||||
"weather.com": 0.65, "timeanddate.com": 0.25, "thesaurus.com": 0.35,
|
||||
"kayak.com": 0.35, "booking.com": 0.50, "airbnb.com": 0.40
|
||||
},
|
||||
"integrated_modern": {
|
||||
"youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.40,
|
||||
"w3schools.com": 0.35, "mdn.io": 0.25, "duckduckgo.com": 0.35,
|
||||
"github.com": 0.40, "stackoverflow.com": 0.40, "npmjs.com": 0.20,
|
||||
"gitlab.com": 0.15, "pypi.org": 0.20, "docs.python.org": 0.20,
|
||||
"rust-lang.org": 0.10, "go.dev": 0.10,
|
||||
"amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30,
|
||||
"bestbuy.com": 0.50, "target.com": 0.40,
|
||||
"nytimes.com": 0.40, "cnn.com": 0.55, "bbc.com": 0.45,
|
||||
"theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30,
|
||||
"washingtonpost.com": 0.30,
|
||||
"techcrunch.com": 0.40, "theverge.com": 0.45, "arstechnica.com": 0.30,
|
||||
"wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25,
|
||||
"medium.com": 0.40, "dev.to": 0.20, "reddit.com": 0.65,
|
||||
"news.ycombinator.com": 0.25, "quora.com": 0.40, "stackexchange.com": 0.35,
|
||||
"imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.30,
|
||||
"allrecipes.com": 0.40, "epicurious.com": 0.25, "tripadvisor.com": 0.40,
|
||||
"weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30,
|
||||
"kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40
|
||||
},
|
||||
"integrated_old": {
|
||||
"youtube.com": 0.75, "wikipedia.org": 0.65, "mozilla.org": 0.30,
|
||||
"w3schools.com": 0.20, "mdn.io": 0.10, "duckduckgo.com": 0.25,
|
||||
"github.com": 0.15, "stackoverflow.com": 0.20, "npmjs.com": 0.05,
|
||||
"gitlab.com": 0.05, "pypi.org": 0.05, "docs.python.org": 0.10,
|
||||
"rust-lang.org": 0.02, "go.dev": 0.02,
|
||||
"amazon.com": 0.85, "ebay.com": 0.55, "etsy.com": 0.45,
|
||||
"bestbuy.com": 0.55, "target.com": 0.50,
|
||||
"nytimes.com": 0.45, "cnn.com": 0.65, "bbc.com": 0.40,
|
||||
"theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.35,
|
||||
"washingtonpost.com": 0.30,
|
||||
"techcrunch.com": 0.20, "theverge.com": 0.25, "arstechnica.com": 0.15,
|
||||
"wired.com": 0.30, "engadget.com": 0.20, "9to5mac.com": 0.20,
|
||||
"medium.com": 0.30, "dev.to": 0.05, "reddit.com": 0.55,
|
||||
"news.ycombinator.com": 0.05, "quora.com": 0.55, "stackexchange.com": 0.15,
|
||||
"imdb.com": 0.70, "rottentomatoes.com": 0.50, "metacritic.com": 0.35,
|
||||
"allrecipes.com": 0.55, "epicurious.com": 0.35, "tripadvisor.com": 0.50,
|
||||
"weather.com": 0.70, "timeanddate.com": 0.30, "thesaurus.com": 0.40,
|
||||
"kayak.com": 0.40, "booking.com": 0.55, "airbnb.com": 0.40
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -120,6 +120,11 @@ class Profile:
|
|||
webgl: WebGLProfile
|
||||
fonts: List[str]
|
||||
dark_theme: bool
|
||||
# Bayesian browsing-history: list of {name, category, cookie_profile}
|
||||
# dicts sampled from data/browsing_pool.json with per-class CPT. Used
|
||||
# by _recaptcha_seed.py to build a coherent cookie pre-seed when the
|
||||
# caller opts in via Stealthfox(prep_recaptcha=True).
|
||||
browsing_history: List[Dict[str, str]] = field(default_factory=list)
|
||||
_raw: Dict[str, Any] = field(default_factory=dict, repr=False, compare=False)
|
||||
|
||||
def to_prefs_dict(self) -> Dict[str, Any]:
|
||||
|
|
@ -255,5 +260,6 @@ def generate_profile(seed: int, pin: Optional[Dict[str, Any]] = None) -> Profile
|
|||
webgl=WebGLProfile(msaa_samples=int(raw["msaa_samples"])),
|
||||
fonts=fonts,
|
||||
dark_theme=bool(raw["dark_theme"]),
|
||||
browsing_history=list(raw.get("browsing_history") or []),
|
||||
_raw=raw,
|
||||
)
|
||||
|
|
|
|||
164
src/invisible_playwright/_geo.py
Normal file
164
src/invisible_playwright/_geo.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
"""Resolve the session timezone from the egress IP (``timezone="auto"``).
|
||||
|
||||
Approach B: discover the egress IP with one HTTP request — routed *through the
|
||||
proxy* when one is set, otherwise a direct request that sees the host's own
|
||||
public IP — then map IP → IANA timezone with an offline mmdb
|
||||
(``daijro/geoip-all-in-one``, downloaded + cached by ``download.py``).
|
||||
|
||||
Precedence (see ``resolve_session_timezone``):
|
||||
|
||||
explicit IANA → unchanged explicit always wins
|
||||
"" / "auto" → egress ALWAYS resolve. With a proxy, from the proxy
|
||||
egress IP; without a proxy, from the host's
|
||||
own public IP. This is the default.
|
||||
|
||||
On failure:
|
||||
with a proxy → raise a foreign proxy paired with the host TZ is
|
||||
the precise ``timezone_mismatch`` signal, so
|
||||
we fail loudly rather than fall back silently.
|
||||
without a proxy → "" (host) the host TZ is a safe default, so a transient
|
||||
lookup failure must not break the launch.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class GeoTimezoneError(RuntimeError):
|
||||
"""Raised when ``timezone="auto"`` cannot resolve a valid IANA zone."""
|
||||
|
||||
|
||||
# Plain-text IP echo endpoints (each returns just the caller's public IP).
|
||||
_IP_ECHO_ENDPOINTS = (
|
||||
"https://api.ipify.org",
|
||||
"https://icanhazip.com",
|
||||
"https://checkip.amazonaws.com",
|
||||
)
|
||||
|
||||
_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://")
|
||||
|
||||
|
||||
def _proxy_is_set(proxy: Optional[Dict[str, str]]) -> bool:
|
||||
if not proxy:
|
||||
return False
|
||||
server = (proxy.get("server") or "").strip()
|
||||
return bool(server) and server.lower() != "direct://"
|
||||
|
||||
|
||||
def _proxies_for_requests(proxy: Dict[str, str]) -> Dict[str, str]:
|
||||
"""Translate our proxy dict into a ``requests`` proxies mapping.
|
||||
|
||||
SOCKS5 uses the ``socks5h`` scheme so DNS is resolved proxy-side (matches
|
||||
``network.proxy.socks_remote_dns=True`` in the Firefox path). HTTP/HTTPS
|
||||
pass through unchanged. Credentials are URL-encoded.
|
||||
"""
|
||||
server = (proxy.get("server") or "").strip()
|
||||
low = server.lower()
|
||||
if low.startswith("socks5://") or low.startswith("socks://"):
|
||||
scheme = "socks5h"
|
||||
elif low.startswith("socks4://"):
|
||||
scheme = "socks4"
|
||||
elif low.startswith("https://"):
|
||||
scheme = "https"
|
||||
else:
|
||||
scheme = "http"
|
||||
|
||||
host_port = server.split("://", 1)[1] if "://" in server else server
|
||||
user = proxy.get("username") or ""
|
||||
pwd = proxy.get("password") or ""
|
||||
if user:
|
||||
auth = f"{quote(user, safe='')}:{quote(pwd, safe='')}@"
|
||||
else:
|
||||
auth = ""
|
||||
url = f"{scheme}://{auth}{host_port}"
|
||||
return {"http": url, "https": url}
|
||||
|
||||
|
||||
def discover_egress_ip(
|
||||
proxy: Optional[Dict[str, str]] = None, *, timeout: float = 10.0
|
||||
) -> str:
|
||||
"""Return the public egress IP.
|
||||
|
||||
Routes the request through ``proxy`` when given (SOCKS support requires
|
||||
``requests[socks]`` / PySocks); with ``proxy=None`` it makes a direct
|
||||
request that sees the host's own public IP. Tries each echo endpoint in
|
||||
turn; raises :class:`GeoTimezoneError` if none return a valid IP.
|
||||
"""
|
||||
proxies = _proxies_for_requests(proxy) if proxy else None
|
||||
last_err: Optional[Exception] = None
|
||||
for url in _IP_ECHO_ENDPOINTS:
|
||||
try:
|
||||
resp = requests.get(url, proxies=proxies, timeout=timeout)
|
||||
resp.raise_for_status()
|
||||
ip = resp.text.strip()
|
||||
ipaddress.ip_address(ip) # validate (raises ValueError if not an IP)
|
||||
return ip
|
||||
except Exception as exc: # noqa: BLE001 - try the next endpoint
|
||||
last_err = exc
|
||||
continue
|
||||
raise GeoTimezoneError(
|
||||
f"could not discover the proxy egress IP via {len(_IP_ECHO_ENDPOINTS)} "
|
||||
f"endpoints (last error: {last_err!r}). For SOCKS proxies make sure "
|
||||
f"requests[socks] / PySocks is installed."
|
||||
)
|
||||
|
||||
|
||||
def ip_to_timezone(ip: str, mmdb_path: Any) -> str:
|
||||
"""Map ``ip`` to its IANA timezone using the offline mmdb.
|
||||
|
||||
Reads the standard MaxMind ``location.time_zone`` field and validates it
|
||||
against the system tz database. Raises :class:`GeoTimezoneError` if the IP
|
||||
is absent from the DB or the zone is missing / not a valid IANA name.
|
||||
"""
|
||||
import maxminddb
|
||||
|
||||
with maxminddb.open_database(str(mmdb_path)) as reader:
|
||||
record = reader.get(ip)
|
||||
if not record:
|
||||
raise GeoTimezoneError(f"egress IP {ip} not present in the geoip database")
|
||||
tz = ((record.get("location") or {}) if isinstance(record, dict) else {}).get(
|
||||
"time_zone"
|
||||
)
|
||||
if not tz:
|
||||
raise GeoTimezoneError(f"no timezone for egress IP {ip} in the geoip database")
|
||||
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||
|
||||
try:
|
||||
ZoneInfo(tz)
|
||||
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||||
raise GeoTimezoneError(
|
||||
f"geoip returned an invalid IANA zone {tz!r} for {ip}: {exc}"
|
||||
) from exc
|
||||
return tz
|
||||
|
||||
|
||||
def resolve_session_timezone(
|
||||
timezone: str, proxy: Optional[Dict[str, str]]
|
||||
) -> str:
|
||||
"""Map the user's ``timezone`` setting to a concrete IANA zone (or ``""``).
|
||||
|
||||
See the module docstring for the full precedence table. ``""``/``"auto"``
|
||||
ALWAYS resolve from the egress IP (proxy egress if a proxy is set, else the
|
||||
host's own public IP). On failure: with a proxy we raise
|
||||
:class:`GeoTimezoneError` (never silently use the host TZ behind a foreign
|
||||
proxy); without a proxy we fall back to ``""`` (host TZ) so a transient
|
||||
lookup failure can't break the launch.
|
||||
"""
|
||||
tz = (timezone or "").strip()
|
||||
if tz and tz.lower() != "auto":
|
||||
return tz # explicit IANA wins
|
||||
# "" or "auto" → always resolve from the egress IP.
|
||||
from .download import ensure_geoip_mmdb
|
||||
|
||||
proxy_set = _proxy_is_set(proxy)
|
||||
try:
|
||||
ip = discover_egress_ip(proxy if proxy_set else None)
|
||||
return ip_to_timezone(ip, ensure_geoip_mmdb())
|
||||
except Exception:
|
||||
if proxy_set:
|
||||
raise # fail-early behind a proxy (timezone_mismatch trap)
|
||||
return "" # no proxy: host TZ is a safe fallback
|
||||
340
src/invisible_playwright/_recaptcha_seed.py
Normal file
340
src/invisible_playwright/_recaptcha_seed.py
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
"""Deterministic reCAPTCHA cookie pre-seed.
|
||||
|
||||
Consumes the Bayesian-sampled `browsing_history` from the persona Profile
|
||||
(see `_fpforge/_sampler.py:derive_browsing_history`). For each visited
|
||||
site, builds 1-5 realistic cookies whose composition is chosen by the
|
||||
site's `cookie_profile` tag (analytics-only / consent / cloudflare-bot-
|
||||
management / etc.). All values seeded deterministically from the persona
|
||||
seed, so a given persona always presents the SAME cookies across sessions.
|
||||
|
||||
In addition, always seeds 5 cookies on .google.com (NID, CONSENT, SOCS,
|
||||
_GRECAPTCHA, ENID). Excludes 1P_JAR which was deprecated by Google in 2022
|
||||
— including it now is an anachronism flag.
|
||||
|
||||
Public API:
|
||||
await seed_recaptcha_cookies_async(context, profile, timezone=None)
|
||||
seed_recaptcha_cookies_sync(context, profile, timezone=None)
|
||||
|
||||
`profile` is an `_fpforge.Profile`; `timezone` is the IANA tz (e.g.
|
||||
"Europe/Rome") used to derive the CONSENT cookie's language token, so a
|
||||
European-tz persona gets CONSENT in their language not en+FX.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import random
|
||||
import time
|
||||
from typing import Any, List, Optional
|
||||
|
||||
# URL-safe base64 alphabet (no padding chars).
|
||||
_B64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||
_HEX_ALPHABET = "0123456789abcdef"
|
||||
|
||||
|
||||
def _sub_seed(seed: int, tag: str) -> int:
|
||||
"""FNV-1a mix → independent PRNG streams per logical bucket from one seed."""
|
||||
h = 0xcbf29ce484222325 ^ (seed & 0xFFFFFFFF)
|
||||
for c in tag.encode("ascii"):
|
||||
h ^= c
|
||||
h = (h * 0x100000001b3) & 0xFFFFFFFFFFFFFFFF
|
||||
return h or 0xdeadbeef
|
||||
|
||||
|
||||
def _b64_rand(rng: random.Random, length: int) -> str:
|
||||
return "".join(rng.choice(_B64_ALPHABET) for _ in range(length))
|
||||
|
||||
|
||||
def _hex_rand(rng: random.Random, length: int) -> str:
|
||||
return "".join(rng.choice(_HEX_ALPHABET) for _ in range(length))
|
||||
|
||||
|
||||
def _yyyymmdd_utc(ts: int) -> str:
|
||||
return datetime.datetime.utcfromtimestamp(ts).strftime("%Y%m%d")
|
||||
|
||||
|
||||
# IANA timezone -> (country_code, lang) for CONSENT cookie coherence.
|
||||
# Real EU users get CONSENT with `<lang>+<COUNTRY>+NNN`; non-EU gets `en+FX+NNN`.
|
||||
# Default fallback `en+FX+NNN` for any tz not in this map.
|
||||
_TZ_TO_REGION = {
|
||||
"Europe/Rome": ("IT", "it"),
|
||||
"Europe/Berlin": ("DE", "de"),
|
||||
"Europe/Paris": ("FR", "fr"),
|
||||
"Europe/Madrid": ("ES", "es"),
|
||||
"Europe/London": ("GB", "en"),
|
||||
"Europe/Amsterdam": ("NL", "nl"),
|
||||
"Europe/Brussels": ("BE", "fr"),
|
||||
"Europe/Vienna": ("AT", "de"),
|
||||
"Europe/Zurich": ("CH", "de"),
|
||||
"Europe/Dublin": ("IE", "en"),
|
||||
"Europe/Lisbon": ("PT", "pt"),
|
||||
"Europe/Stockholm": ("SE", "sv"),
|
||||
"Europe/Oslo": ("NO", "no"),
|
||||
"Europe/Copenhagen": ("DK", "da"),
|
||||
"Europe/Helsinki": ("FI", "fi"),
|
||||
"Europe/Warsaw": ("PL", "pl"),
|
||||
"Europe/Prague": ("CZ", "cs"),
|
||||
"Europe/Athens": ("GR", "el"),
|
||||
"Asia/Tokyo": ("FX", "ja"),
|
||||
"Asia/Shanghai": ("FX", "zh"),
|
||||
"Asia/Hong_Kong": ("FX", "zh"),
|
||||
"Asia/Seoul": ("FX", "ko"),
|
||||
}
|
||||
|
||||
|
||||
def _consent_region_lang(timezone: Optional[str]) -> tuple:
|
||||
"""Map IANA tz → (region_token, lang_2char) for CONSENT cookie.
|
||||
Default `("FX", "en")` for US/unknown."""
|
||||
if timezone and timezone in _TZ_TO_REGION:
|
||||
return _TZ_TO_REGION[timezone]
|
||||
return ("FX", "en")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# .google.com cookie batch (always present, regardless of browsing history)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _google_cookies(rng: random.Random, now: int,
|
||||
timezone: Optional[str] = None) -> List[dict]:
|
||||
consent_age = rng.randint(60, 720) * 86400
|
||||
region, lang = _consent_region_lang(timezone)
|
||||
# NID 3-digit prefix range broadened to 100-540 to cover historical NID
|
||||
# versions (137, 105, 511, 525 etc. observed in real captures).
|
||||
return [
|
||||
{"name": "NID",
|
||||
"value": f"{rng.randint(100, 540)}={_b64_rand(rng, 178)}",
|
||||
"domain": ".google.com", "path": "/",
|
||||
"expires": now + 180 * 86400,
|
||||
"httpOnly": True, "secure": True, "sameSite": "None"},
|
||||
{"name": "CONSENT",
|
||||
"value": f"YES+cb.{_yyyymmdd_utc(now - consent_age)}-"
|
||||
f"{rng.randint(10, 19):02d}-p{rng.randint(0, 9)}."
|
||||
f"{lang}+{region}+{rng.randint(100, 999)}",
|
||||
"domain": ".google.com", "path": "/",
|
||||
"expires": now + 395 * 86400,
|
||||
"secure": True, "sameSite": "Lax"},
|
||||
# 1P_JAR removed: Google deprecated it in 2022. Including it now is
|
||||
# an anachronism flag for fingerprinters that look at cookie freshness.
|
||||
{"name": "SOCS",
|
||||
"value": f"CAES{_b64_rand(rng, 56)}",
|
||||
"domain": ".google.com", "path": "/",
|
||||
"expires": now + 395 * 86400,
|
||||
"secure": True, "sameSite": "Lax"},
|
||||
{"name": "_GRECAPTCHA",
|
||||
"value": _b64_rand(rng, 124),
|
||||
"domain": ".google.com", "path": "/",
|
||||
"expires": now + 180 * 86400,
|
||||
"secure": True, "sameSite": "None"},
|
||||
{"name": "ENID",
|
||||
"value": _b64_rand(rng, 252),
|
||||
"domain": ".google.com", "path": "/",
|
||||
"expires": now + 395 * 86400,
|
||||
"httpOnly": True, "secure": True, "sameSite": "Lax"},
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-site cookie generators (recipes keyed by site["cookie_profile"])
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _norm_domain(domain: str) -> str:
|
||||
return domain if domain.startswith(".") else "." + domain
|
||||
|
||||
|
||||
def _ga_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
first_age = rng.randint(7, 395) * 86400
|
||||
return {"name": "_ga",
|
||||
"value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - first_age}",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 395 * 86400,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _gid_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
return {"name": "_gid",
|
||||
"value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - rng.randint(60, 86400)}",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 86400,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _cf_bm_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
return {"name": "__cf_bm",
|
||||
"value": f"{_b64_rand(rng, 43)}.{rng.randint(1700000000, now)}-1-1-1-1",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 1800,
|
||||
"secure": True, "sameSite": "None"}
|
||||
|
||||
|
||||
def _onetrust_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
age_d = rng.randint(7, 365)
|
||||
iso = datetime.datetime.utcfromtimestamp(now - age_d * 86400).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S.000Z"
|
||||
)
|
||||
return {"name": "OptanonAlertBoxClosed",
|
||||
"value": iso,
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 395 * 86400,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _cookieyes_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
return {"name": "cookieyes-consent",
|
||||
"value": "consentid:" + _b64_rand(rng, 28) +
|
||||
",consent:yes,action:yes,necessary:yes,functional:yes,analytics:yes",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 395 * 86400,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _clarity_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
return {"name": "_clck",
|
||||
"value": f"{_hex_rand(rng, 8)}|2|f{rng.randint(10, 99)}|0|"
|
||||
f"{now - rng.randint(60, 180) * 86400}",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 365 * 86400,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _fbp_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
"""Facebook Pixel _fbp = fb.<subdomain_index>.<unix_ms>.<random_int>"""
|
||||
return {"name": "_fbp",
|
||||
"value": f"fb.1.{(now - rng.randint(60, 30*86400)) * 1000}."
|
||||
f"{rng.randint(100000000, 9999999999)}",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 90 * 86400,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _gtm_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
"""_dc_gtm_<container_id>=1 — Google Tag Manager throttle flag."""
|
||||
container = f"UA-{rng.randint(10000000, 99999999)}-{rng.randint(1, 9)}"
|
||||
return {"name": f"_dc_gtm_{container}",
|
||||
"value": "1",
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 60,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _hssrc_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||
"""HubSpot referrer flag — small int."""
|
||||
return {"name": "__hssrc",
|
||||
"value": str(rng.randint(1, 5)),
|
||||
"domain": domain, "path": "/",
|
||||
"expires": now + 1800,
|
||||
"secure": True, "sameSite": "Lax"}
|
||||
|
||||
|
||||
def _cookies_for_profile(profile: str, rng: random.Random,
|
||||
now: int, domain: str) -> List[dict]:
|
||||
"""Map cookie_profile tag (from browsing_pool.json) → concrete cookies.
|
||||
|
||||
Each recipe is a realistic combination observed on real production sites
|
||||
in that category. Cookie age and sub-recipe variance (e.g., OneTrust vs
|
||||
CookieYes for consent banner) are deterministic from rng.
|
||||
"""
|
||||
domain = _norm_domain(domain)
|
||||
if profile == "minimal":
|
||||
return [_ga_cookie(rng, now, domain)]
|
||||
if profile == "ga_only":
|
||||
out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)]
|
||||
# 30% chance of GTM helper paired with GA
|
||||
if rng.random() < 0.3:
|
||||
out.append(_gtm_cookie(rng, now, domain))
|
||||
return out
|
||||
if profile == "ga_cf":
|
||||
return [_ga_cookie(rng, now, domain), _cf_bm_cookie(rng, now, domain)]
|
||||
if profile == "ga_consent":
|
||||
out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)]
|
||||
out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5
|
||||
else _cookieyes_cookie(rng, now, domain))
|
||||
if rng.random() < 0.4:
|
||||
out.append(_gtm_cookie(rng, now, domain))
|
||||
return out
|
||||
if profile == "ga_consent_clarity":
|
||||
# Heavy-tracking site profile: GA + Clarity + consent + often FB pixel
|
||||
out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain),
|
||||
_clarity_cookie(rng, now, domain)]
|
||||
out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5
|
||||
else _cookieyes_cookie(rng, now, domain))
|
||||
if rng.random() < 0.5:
|
||||
out.append(_fbp_cookie(rng, now, domain))
|
||||
if rng.random() < 0.4:
|
||||
out.append(_gtm_cookie(rng, now, domain))
|
||||
if rng.random() < 0.25:
|
||||
out.append(_hssrc_cookie(rng, now, domain))
|
||||
return out
|
||||
# Unknown profile → safe fallback
|
||||
return [_ga_cookie(rng, now, domain)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_cookies(seed: int,
|
||||
browsing_history: Optional[List[dict]] = None,
|
||||
now: Optional[int] = None,
|
||||
timezone: Optional[str] = None) -> List[dict]:
|
||||
"""Build the full cookie list for a persona.
|
||||
|
||||
Args:
|
||||
seed: persona integer seed (from `Profile.seed`)
|
||||
browsing_history: list of {name, category, cookie_profile} dicts as
|
||||
sampled by `_fpforge.derive_browsing_history`. None → empty list
|
||||
(only the 5 google cookies are returned).
|
||||
now: unix-seconds timestamp; defaults to current time. Pin for tests.
|
||||
timezone: IANA tz used to derive CONSENT cookie's `lang+region` token
|
||||
(e.g. "Europe/Rome" → "it+IT", "America/New_York" → "en+FX").
|
||||
"""
|
||||
ts = now if now is not None else int(time.time())
|
||||
cookies: List[dict] = []
|
||||
|
||||
# 5 .google.com cookies (always) — CONSENT lang derived from tz
|
||||
rng_g = random.Random(_sub_seed(int(seed), "google"))
|
||||
cookies.extend(_google_cookies(rng_g, ts, timezone=timezone))
|
||||
|
||||
# Per-site cookies (deterministic from seed × domain)
|
||||
for site in (browsing_history or []):
|
||||
rng_d = random.Random(_sub_seed(int(seed), f"dom:{site['name']}"))
|
||||
cookies.extend(_cookies_for_profile(
|
||||
site.get("cookie_profile", "minimal"), rng_d, ts, site["name"]
|
||||
))
|
||||
return cookies
|
||||
|
||||
|
||||
def _extract_seed_and_history(profile: Any) -> tuple:
|
||||
"""Accept a Profile object OR a (seed, history) tuple OR just an int seed."""
|
||||
if isinstance(profile, int):
|
||||
return int(profile), []
|
||||
seed = int(getattr(profile, "seed"))
|
||||
history = list(getattr(profile, "browsing_history", []) or [])
|
||||
return seed, history
|
||||
|
||||
|
||||
async def seed_recaptcha_cookies_async(context: Any, profile: Any,
|
||||
timezone: Optional[str] = None) -> None:
|
||||
"""Async: inject deterministic persona cookies into the context."""
|
||||
seed, history = _extract_seed_and_history(profile)
|
||||
cookies = build_cookies(seed, history, timezone=timezone)
|
||||
try:
|
||||
await context.add_cookies(cookies)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def seed_recaptcha_cookies_sync(context: Any, profile: Any,
|
||||
timezone: Optional[str] = None) -> None:
|
||||
"""Sync: inject deterministic persona cookies into the context."""
|
||||
seed, history = _extract_seed_and_history(profile)
|
||||
cookies = build_cookies(seed, history, timezone=timezone)
|
||||
try:
|
||||
context.add_cookies(cookies)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
__all__ = [
|
||||
"build_cookies",
|
||||
"seed_recaptcha_cookies_async",
|
||||
"seed_recaptcha_cookies_sync",
|
||||
]
|
||||
|
|
@ -3,11 +3,13 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
from playwright.async_api import Browser, Playwright, async_playwright
|
||||
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
||||
|
||||
from ._fpforge import Profile, generate_profile
|
||||
from ._geo import resolve_session_timezone
|
||||
from ._headless import make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from .download import ensure_binary
|
||||
|
|
@ -49,6 +51,8 @@ class InvisiblePlaywright:
|
|||
timezone: str = "",
|
||||
extra_prefs: Optional[Dict[str, Any]] = None,
|
||||
binary_path: Optional[str] = None,
|
||||
profile_dir: Optional[Union[str, Path]] = None,
|
||||
prep_recaptcha: bool = False,
|
||||
) -> None:
|
||||
# See sync launcher: `zoom.stealth.fpp.hw_seed` is int32_t — clamp.
|
||||
self.seed: int = int(seed) if seed is not None else secrets.randbits(31)
|
||||
|
|
@ -61,13 +65,24 @@ class InvisiblePlaywright:
|
|||
self._timezone = timezone
|
||||
self._extra_prefs = extra_prefs
|
||||
self._binary_path = binary_path
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
# reCAPTCHA pre-seed gated server-side; respect persistent profile.
|
||||
self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None
|
||||
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
||||
self._pw: Optional[Playwright] = None
|
||||
self._browser: Optional[Browser] = None
|
||||
self._persistent_context: Optional[BrowserContext] = None
|
||||
self._virtual_display: Any = None
|
||||
|
||||
async def __aenter__(self) -> Browser:
|
||||
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
||||
import sys as _sys
|
||||
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||
# concrete IANA zone before anything reads self._timezone. Run the
|
||||
# blocking geo lookup off the event loop. Fail-early if a proxy is set
|
||||
# but the egress zone can't be resolved.
|
||||
self._timezone = await asyncio.to_thread(
|
||||
resolve_session_timezone, self._timezone, self._proxy
|
||||
)
|
||||
executable = self._binary_path or ensure_binary()
|
||||
prefs = translate_profile_to_prefs(
|
||||
self._profile,
|
||||
|
|
@ -85,6 +100,24 @@ class InvisiblePlaywright:
|
|||
env = self._build_env()
|
||||
try:
|
||||
self._pw = await async_playwright().start()
|
||||
if self._profile_dir is not None:
|
||||
# See sync launcher for the persistent-context rationale.
|
||||
self._profile_dir.mkdir(parents=True, exist_ok=True)
|
||||
# firefox-5 ships the C++ overrideTimezone IDL method (C7
|
||||
# chiusura), so locale + timezone_id now propagate cleanly
|
||||
# to the persistent context without hanging the launch.
|
||||
self._persistent_context = await self._pw.firefox.launch_persistent_context(
|
||||
user_data_dir=str(self._profile_dir),
|
||||
executable_path=str(executable),
|
||||
headless=pw_headless,
|
||||
firefox_user_prefs=prefs,
|
||||
proxy=playwright_proxy,
|
||||
args=self._extra_args,
|
||||
env=env,
|
||||
**self._default_context_kwargs(),
|
||||
)
|
||||
_patch_new_page_sleep(self._persistent_context)
|
||||
return self._persistent_context
|
||||
self._browser = await self._pw.firefox.launch(
|
||||
executable_path=str(executable),
|
||||
headless=pw_headless,
|
||||
|
|
@ -102,12 +135,18 @@ class InvisiblePlaywright:
|
|||
def _patch_new_context_defaults(self, browser: Browser) -> None:
|
||||
original = browser.new_context
|
||||
defaults = self._default_context_kwargs()
|
||||
prep = self._prep_recaptcha
|
||||
profile = self._profile # pass the whole Profile (seed + browsing_history)
|
||||
tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region
|
||||
|
||||
async def patched(**kw):
|
||||
merged = dict(defaults)
|
||||
merged.update(kw)
|
||||
ctx = await original(**merged)
|
||||
_patch_new_page_sleep(ctx)
|
||||
if prep:
|
||||
from ._recaptcha_seed import seed_recaptcha_cookies_async
|
||||
await seed_recaptcha_cookies_async(ctx, profile, timezone=tz)
|
||||
return ctx
|
||||
|
||||
browser.new_context = patched # type: ignore[assignment]
|
||||
|
|
@ -134,6 +173,12 @@ class InvisiblePlaywright:
|
|||
await self._teardown()
|
||||
|
||||
async def _teardown(self) -> None:
|
||||
if self._persistent_context is not None:
|
||||
try:
|
||||
await self._persistent_context.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._persistent_context = None
|
||||
if self._browser is not None:
|
||||
try:
|
||||
await self._browser.close()
|
||||
|
|
|
|||
|
|
@ -10,7 +10,15 @@ from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
|||
from .download import cache_root, ensure_binary
|
||||
|
||||
|
||||
def _cmd_fetch(_args: argparse.Namespace) -> int:
|
||||
def _cmd_fetch(args: argparse.Namespace) -> int:
|
||||
# --force: re-download even if already cached (drop the cached version dir,
|
||||
# then let ensure_binary fetch it fresh). Useful to recover a corrupted cache
|
||||
# or re-pull after a re-published release.
|
||||
if getattr(args, "force", False):
|
||||
from .download import cache_dir_for_version
|
||||
d = cache_dir_for_version()
|
||||
if d.exists():
|
||||
shutil.rmtree(d, ignore_errors=True)
|
||||
path = ensure_binary()
|
||||
print(path)
|
||||
return 0
|
||||
|
|
@ -44,9 +52,17 @@ def _cmd_clear_cache(_args: argparse.Namespace) -> int:
|
|||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
p = argparse.ArgumentParser(prog="invisible-playwright", description="invisible_playwright CLI")
|
||||
sub = p.add_subparsers(dest="cmd", required=True)
|
||||
# Top-level `--version` / `-V` flag so `python -m invisible_playwright --version`
|
||||
# works (Python convention), in addition to the existing `version` subcommand.
|
||||
p.add_argument(
|
||||
"-V", "--version", action="version",
|
||||
version=f"invisible_playwright {__version__} (BINARY_VERSION={BINARY_VERSION}, Firefox {FIREFOX_UPSTREAM_VERSION})",
|
||||
)
|
||||
sub = p.add_subparsers(dest="cmd")
|
||||
|
||||
sub.add_parser("fetch", help="download the patched Firefox binary")
|
||||
fetch_p = sub.add_parser("fetch", help="download the patched Firefox binary")
|
||||
fetch_p.add_argument("--force", action="store_true",
|
||||
help="re-download even if already cached")
|
||||
sub.add_parser("path", help="print the absolute path to the cached binary")
|
||||
sub.add_parser("version", help="print wrapper and binary versions")
|
||||
sub.add_parser("clear-cache", help="remove all cached binaries")
|
||||
|
|
@ -54,7 +70,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
args = build_parser().parse_args(argv)
|
||||
parser = build_parser()
|
||||
args = parser.parse_args(argv)
|
||||
if args.cmd is None:
|
||||
# argparse-conventional: print usage + error message to stderr, exit 2.
|
||||
# We can't keep `required=True` on the subparsers because that breaks
|
||||
# the top-level `--version` flag (argparse demands a subcommand even
|
||||
# when --version is the only token). parser.error() preserves the
|
||||
# original "no subcommand" exit semantics tests expect.
|
||||
parser.error("a subcommand is required (try --help, --version, or one of: fetch, path, version, clear-cache)")
|
||||
dispatch = {
|
||||
"fetch": _cmd_fetch,
|
||||
"path": _cmd_path,
|
||||
|
|
|
|||
110
src/invisible_playwright/config.py
Normal file
110
src/invisible_playwright/config.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
"""Public helpers for building Firefox launch config without using ``InvisiblePlaywright``.
|
||||
|
||||
Use these when you need to call ``playwright.firefox.launch()`` (or
|
||||
``firefox.launch_persistent_context()``) directly with our patched binary
|
||||
and stealth prefs, instead of using the ``InvisiblePlaywright`` context
|
||||
manager.
|
||||
|
||||
Typical caller is an external integration that owns its own browser
|
||||
lifecycle (a Crawlee/Skyvern/changedetection-style fetcher, a Playwright
|
||||
Server wrapper, a multi-language harness) and just wants the building
|
||||
blocks::
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
from invisible_playwright import ensure_binary, get_default_stealth_prefs
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.firefox.launch(
|
||||
executable_path=str(ensure_binary()),
|
||||
firefox_user_prefs=get_default_stealth_prefs(seed=42),
|
||||
)
|
||||
|
||||
For everyday Python usage the ``InvisiblePlaywright`` context manager is
|
||||
still the recommended entry point; these helpers expose the same internals
|
||||
without the lifecycle ownership.
|
||||
|
||||
.. note::
|
||||
When calling ``firefox.launch()`` yourself, pass ``headless=False`` and
|
||||
manage the display hiding (Xvfb on Linux, hidden desktop on Windows)
|
||||
externally. Passing ``headless=True`` directly to Playwright puts
|
||||
Firefox in true headless mode, which skips the real rendering pipeline
|
||||
and breaks canvas / audio / WebGL fingerprint coherence. The
|
||||
``InvisiblePlaywright`` context manager does this translation
|
||||
automatically; the public helpers leave it to the caller.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from ._fpforge import generate_profile
|
||||
from .prefs import translate_profile_to_prefs
|
||||
|
||||
|
||||
def get_default_stealth_prefs(
|
||||
seed: Optional[int] = None,
|
||||
*,
|
||||
pin: Optional[Dict[str, Any]] = None,
|
||||
locale: str = "en-US",
|
||||
timezone: str = "",
|
||||
extra_prefs: Optional[Dict[str, Any]] = None,
|
||||
humanize: Union[bool, float] = True,
|
||||
virtual_display: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a complete ``firefox_user_prefs`` dict for ``firefox.launch()``.
|
||||
|
||||
Same prefs that ``InvisiblePlaywright(seed=..., locale=..., timezone=...,
|
||||
extra_prefs=..., humanize=...)`` would inject. Use this when you need to
|
||||
drive ``playwright.firefox.launch()`` yourself.
|
||||
|
||||
Args:
|
||||
seed: Integer seed for the Bayesian fingerprint sampler. Same seed
|
||||
produces the same fingerprint. ``None`` generates a fresh
|
||||
random int31 (matches ``InvisiblePlaywright`` default).
|
||||
pin: Optional dict forcing specific fingerprint fields while the
|
||||
rest stays seed-derived. See ``docs/pinning.md``.
|
||||
locale: BCP-47 tag (e.g. ``"en-US"``). Drives ``Accept-Language``
|
||||
and ``navigator.language``.
|
||||
timezone: IANA timezone (e.g. ``"America/New_York"``). Empty means
|
||||
use the host TZ. This pure pref builder does NOT resolve
|
||||
``"auto"`` (that needs the proxy + a network lookup at launch
|
||||
time) — pass a concrete zone here, or use ``InvisiblePlaywright``
|
||||
/ ``resolve_session_timezone(timezone, proxy)`` for ``"auto"``.
|
||||
extra_prefs: Optional dict overlaid LAST onto the generated prefs.
|
||||
humanize: When True (default), every mouse move is expanded into
|
||||
a Bezier trajectory by the patched Juggler. A float caps the
|
||||
motion in seconds. False disables the behavior.
|
||||
virtual_display: When True on Windows, apply GPU-disabling prefs
|
||||
to prevent GPU process crashes on virtual desktops without
|
||||
D3D11 backend.
|
||||
|
||||
Returns:
|
||||
Dict ready to pass as ``firefox_user_prefs=`` to
|
||||
``playwright.firefox.launch()`` or ``launch_persistent_context()``.
|
||||
"""
|
||||
resolved_seed = int(seed) if seed is not None else secrets.randbits(31)
|
||||
profile = generate_profile(resolved_seed, pin=pin)
|
||||
prefs = translate_profile_to_prefs(
|
||||
profile,
|
||||
locale=locale,
|
||||
timezone=timezone,
|
||||
extra_prefs=extra_prefs,
|
||||
virtual_display=virtual_display,
|
||||
)
|
||||
prefs["invisible_playwright.humanize"] = bool(humanize)
|
||||
if humanize:
|
||||
max_seconds = float(humanize) if not isinstance(humanize, bool) else 1.5
|
||||
prefs["invisible_playwright.humanize.maxTime"] = str(max_seconds)
|
||||
return prefs
|
||||
|
||||
|
||||
def get_default_args() -> List[str]:
|
||||
"""Return the default Firefox CLI args to pass via ``args=``.
|
||||
|
||||
Currently empty list, since all our stealth configuration is delivered
|
||||
via ``firefox_user_prefs`` rather than CLI flags. Exposed for parity
|
||||
with the ``cloakbrowser.config.get_default_stealth_args`` pattern and
|
||||
to future-proof integrations that already wire ``args=[*existing,
|
||||
*get_default_args()]``.
|
||||
"""
|
||||
return []
|
||||
|
|
@ -7,7 +7,14 @@ bugfixes don't force a multi-hour Firefox rebuild.
|
|||
from __future__ import annotations
|
||||
|
||||
# Bump this when a new patched Firefox build is released on GitHub.
|
||||
BINARY_VERSION: str = "firefox-3"
|
||||
BINARY_VERSION: str = "firefox-9"
|
||||
|
||||
# Releases known to be broken — ensure_binary() refuses them with a clear error
|
||||
# instead of handing the user an unusable binary. firefox-8 was packaged without
|
||||
# the juggler automation layer, so Playwright cannot drive it (TargetClosedError);
|
||||
# fixed in firefox-9 (package-manifest.in now ships chrome/juggler). A cached
|
||||
# firefox-8 from before the bump would otherwise keep being used silently.
|
||||
BROKEN_VERSIONS: frozenset[str] = frozenset({"firefox-8"})
|
||||
|
||||
# Underlying Firefox version (for display only; does not drive downloads).
|
||||
FIREFOX_UPSTREAM_VERSION: str = "150.0.1"
|
||||
|
|
@ -19,13 +26,15 @@ BINARY_BASENAME: str = f"firefox-{FIREFOX_UPSTREAM_VERSION}-stealth"
|
|||
def ARCHIVE_NAME(platform_key: str, machine: str) -> str:
|
||||
"""Return the platform-specific archive filename.
|
||||
|
||||
platform_key: sys.platform ("win32", "linux")
|
||||
machine: platform.machine() ("AMD64", "x86_64", ...)
|
||||
platform_key: sys.platform ("win32", "linux", "darwin")
|
||||
machine: platform.machine() ("AMD64", "x86_64", "arm64", "aarch64", ...)
|
||||
"""
|
||||
pk = platform_key.lower()
|
||||
m = machine.lower()
|
||||
if m in {"amd64", "x86_64"}:
|
||||
arch = "x86_64"
|
||||
elif m in {"arm64", "aarch64"}:
|
||||
arch = "arm64"
|
||||
else:
|
||||
raise NotImplementedError(f"unsupported arch: {machine}")
|
||||
|
||||
|
|
@ -33,16 +42,39 @@ def ARCHIVE_NAME(platform_key: str, machine: str) -> str:
|
|||
return f"{BINARY_BASENAME}-win-{arch}.zip"
|
||||
if pk == "linux":
|
||||
return f"{BINARY_BASENAME}-linux-{arch}.tar.gz"
|
||||
if pk == "darwin":
|
||||
return f"{BINARY_BASENAME}-macos-{arch}.tar.gz"
|
||||
raise NotImplementedError(f"unsupported platform: {platform_key}")
|
||||
|
||||
|
||||
# Binary entry point relative path inside the extracted archive root.
|
||||
# macOS ships the .app bundle (renamed to a stable "Firefox.app" by release.yml);
|
||||
# the wrapper execs the inner binary directly, which sidesteps Gatekeeper.
|
||||
BINARY_ENTRY_REL = {
|
||||
"win32": "firefox.exe",
|
||||
"linux": "firefox",
|
||||
"darwin": "Firefox.app/Contents/MacOS/firefox",
|
||||
}
|
||||
|
||||
# GitHub release URL template. The "TODO" owner is resolved at publication time.
|
||||
RELEASE_URL_TEMPLATE = (
|
||||
"https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}"
|
||||
)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# GeoIP database (timezone="auto" → resolve IANA zone from proxy egress IP)
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# daijro/geoip-all-in-one merges IP2Location LITE + GeoLite2 + DB-IP into a
|
||||
# single mmdb (country ISO + coordinates + IANA timezone via tzfpy), rebuilt
|
||||
# weekly. GPL-3.0, so we DOWNLOAD it at runtime into the user cache (like the
|
||||
# Firefox binary) rather than bundling it into this MIT package. The `-all`
|
||||
# variant covers IPv4+IPv6. download.py tracks the LATEST release and refreshes
|
||||
# weekly; GEOIP_MMDB_VERSION is only the cold-cache fallback when the GitHub
|
||||
# API is unreachable on a machine that has never downloaded the DB.
|
||||
GEOIP_REPO: str = "daijro/geoip-all-in-one"
|
||||
GEOIP_MMDB_VERSION: str = "2026.06.03"
|
||||
GEOIP_ASSET: str = "geoip-aio-all.mmdb.zip"
|
||||
GEOIP_MMDB_NAME: str = "geoip-aio-all.mmdb"
|
||||
GEOIP_RELEASE_URL_TEMPLATE: str = (
|
||||
"https://github.com/daijro/geoip-all-in-one/releases/download/{tag}/{asset}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -5,9 +5,12 @@ import hashlib
|
|||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -18,6 +21,11 @@ from .constants import (
|
|||
ARCHIVE_NAME,
|
||||
BINARY_ENTRY_REL,
|
||||
BINARY_VERSION,
|
||||
BROKEN_VERSIONS,
|
||||
GEOIP_ASSET,
|
||||
GEOIP_MMDB_NAME,
|
||||
GEOIP_MMDB_VERSION,
|
||||
GEOIP_RELEASE_URL_TEMPLATE,
|
||||
RELEASE_URL_TEMPLATE,
|
||||
)
|
||||
|
||||
|
|
@ -96,7 +104,9 @@ def _parse_checksums(text: str) -> dict[str, str]:
|
|||
continue
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
out[parts[-1]] = parts[0]
|
||||
# sha256sum uses ' *' or ' ' prefix for binary vs text mode
|
||||
key = parts[-1].lstrip("*")
|
||||
out[key] = parts[0]
|
||||
return out
|
||||
|
||||
|
||||
|
|
@ -112,8 +122,39 @@ def _extract(archive: Path, dst: Path) -> None:
|
|||
raise RuntimeError(f"unknown archive format: {archive}")
|
||||
|
||||
|
||||
def _post_extract_darwin(app_root: Path, entry: Path) -> None:
|
||||
"""Make an ad-hoc-signed .app launchable on macOS.
|
||||
|
||||
The .app is downloaded via requests (no Finder quarantine attached), but we
|
||||
strip com.apple.quarantine defensively and ensure the inner binary is
|
||||
executable. We exec the inner binary directly (not via LaunchServices), so
|
||||
Gatekeeper's first-launch prompt does not apply; the ad-hoc signature
|
||||
(applied in release.yml) is what lets the arm64 Mach-O run at all.
|
||||
"""
|
||||
app = app_root
|
||||
# walk up to the .app bundle dir if entry points inside it
|
||||
for parent in entry.parents:
|
||||
if parent.name.endswith(".app"):
|
||||
app = parent
|
||||
break
|
||||
try:
|
||||
subprocess.run(["xattr", "-dr", "com.apple.quarantine", str(app)], check=False)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
try:
|
||||
entry.chmod(0o755)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def ensure_binary(version: str = BINARY_VERSION) -> Path:
|
||||
"""Return a path to a runnable Firefox executable. Download if needed."""
|
||||
if version in BROKEN_VERSIONS:
|
||||
raise RuntimeError(
|
||||
f"{version} is a known-broken release (the juggler automation layer is "
|
||||
f"missing, so Playwright cannot drive it). Upgrade invisible_playwright "
|
||||
f"(current BINARY_VERSION={BINARY_VERSION}) or pass a newer version."
|
||||
)
|
||||
plat = sys.platform
|
||||
mach = platform.machine()
|
||||
asset = ARCHIVE_NAME(plat, mach)
|
||||
|
|
@ -146,6 +187,142 @@ def ensure_binary(version: str = BINARY_VERSION) -> Path:
|
|||
)
|
||||
_extract(archive_path, version_dir)
|
||||
|
||||
if plat == "darwin":
|
||||
_post_extract_darwin(version_dir, entry)
|
||||
|
||||
if not entry.exists():
|
||||
raise RuntimeError(f"binary not found after extraction: {entry}")
|
||||
return entry
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# GeoIP mmdb (timezone="auto" → map egress IP → IANA zone)
|
||||
#
|
||||
# daijro/geoip-all-in-one is rebuilt WEEKLY, so we don't pin a tag. We cache
|
||||
# the latest mmdb and, once it's older than GEOIP_REFRESH_DAYS, re-check the
|
||||
# latest release and pull a newer build if one exists. Net effect: no download
|
||||
# (not even an API call) on a launch within the window; auto-refresh after it;
|
||||
# a stale cache is reused when offline rather than breaking the launch.
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
GEOIP_REFRESH_DAYS = 7 # matches daijro's weekly rebuild cadence
|
||||
|
||||
|
||||
def _geoip_root() -> Path:
|
||||
return cache_root() / "geoip"
|
||||
|
||||
|
||||
def _geoip_check_marker() -> Path:
|
||||
return _geoip_root() / ".last_check"
|
||||
|
||||
|
||||
def _cached_geoip_mmdb() -> Path | None:
|
||||
"""Newest cached mmdb across tag dirs, or None. Tag dirs are date strings
|
||||
(e.g. ``2026.06.03``) so a lexical sort is chronological."""
|
||||
root = _geoip_root()
|
||||
if not root.exists():
|
||||
return None
|
||||
cands = sorted(root.glob("*/*.mmdb"))
|
||||
return cands[-1] if cands else None
|
||||
|
||||
|
||||
def _geoip_cache_fresh(max_age_days: int) -> bool:
|
||||
marker = _geoip_check_marker()
|
||||
if not marker.exists():
|
||||
return False
|
||||
return (time.time() - marker.stat().st_mtime) < max_age_days * 86400
|
||||
|
||||
|
||||
def _touch_geoip_marker() -> None:
|
||||
m = _geoip_check_marker()
|
||||
m.parent.mkdir(parents=True, exist_ok=True)
|
||||
m.touch()
|
||||
|
||||
|
||||
def _latest_geoip_tag() -> str:
|
||||
"""Latest ``daijro/geoip-all-in-one`` release tag via the GitHub API."""
|
||||
headers = {"Accept": "application/vnd.github+json"}
|
||||
token = _github_token()
|
||||
if token:
|
||||
headers["Authorization"] = f"token {token}"
|
||||
r = requests.get(
|
||||
f"https://api.github.com/repos/{GEOIP_REPO}/releases/latest",
|
||||
headers=headers, timeout=15,
|
||||
)
|
||||
r.raise_for_status()
|
||||
tag = r.json().get("tag_name")
|
||||
if not tag:
|
||||
raise RuntimeError("no tag_name in geoip-all-in-one latest release")
|
||||
return tag
|
||||
|
||||
|
||||
def _download_geoip_tag(tag: str) -> Path:
|
||||
"""Download + extract a specific tag's mmdb if not already cached."""
|
||||
dst_dir = _geoip_root() / tag
|
||||
target = dst_dir / GEOIP_MMDB_NAME
|
||||
if not target.exists():
|
||||
url = GEOIP_RELEASE_URL_TEMPLATE.format(tag=tag, asset=GEOIP_ASSET)
|
||||
dst_dir.mkdir(parents=True, exist_ok=True)
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
archive = Path(td) / GEOIP_ASSET
|
||||
_download_file(url, archive)
|
||||
_extract(archive, dst_dir)
|
||||
if target.exists():
|
||||
return target
|
||||
# asset name inside the zip may differ from GEOIP_MMDB_NAME
|
||||
found = sorted(dst_dir.glob("*.mmdb"))
|
||||
if found:
|
||||
return found[0]
|
||||
raise RuntimeError(f"geoip mmdb not found after extraction in {dst_dir}")
|
||||
|
||||
|
||||
def _prune_old_geoip_tags(keep: str) -> None:
|
||||
"""Drop every cached tag dir except ``keep`` to bound disk usage."""
|
||||
root = _geoip_root()
|
||||
if not root.exists():
|
||||
return
|
||||
for d in root.iterdir():
|
||||
if d.is_dir() and d.name != keep:
|
||||
shutil.rmtree(d, ignore_errors=True)
|
||||
|
||||
|
||||
def geoip_mmdb_path() -> Path | None:
|
||||
"""Path to the currently-cached mmdb (newest tag), or None if none cached."""
|
||||
return _cached_geoip_mmdb()
|
||||
|
||||
|
||||
def ensure_geoip_mmdb(max_age_days: int = GEOIP_REFRESH_DAYS) -> Path:
|
||||
"""Return a geoip mmdb, kept fresh against daijro's weekly rebuild.
|
||||
|
||||
Resolution order:
|
||||
1. ``STEALTHFOX_GEOIP_MMDB`` env → use that file (user-supplied / test).
|
||||
2. A cached mmdb younger than ``max_age_days`` → use it (no network).
|
||||
3. Else ask GitHub for the latest tag, download it if not already cached,
|
||||
prune older tags, and reset the freshness timer.
|
||||
4. If the API/download is unreachable but a cached mmdb exists → use it
|
||||
(and reset the timer so we don't hammer the API while offline).
|
||||
5. Cold cache + no network → fall back to the pinned ``GEOIP_MMDB_VERSION``;
|
||||
if that download also fails, raise.
|
||||
"""
|
||||
override = os.environ.get("STEALTHFOX_GEOIP_MMDB")
|
||||
if override:
|
||||
p = Path(override)
|
||||
if not p.exists():
|
||||
raise RuntimeError(f"STEALTHFOX_GEOIP_MMDB points to a missing file: {p}")
|
||||
return p
|
||||
|
||||
cached = _cached_geoip_mmdb()
|
||||
if cached and _geoip_cache_fresh(max_age_days):
|
||||
return cached
|
||||
|
||||
try:
|
||||
tag = _latest_geoip_tag()
|
||||
except Exception:
|
||||
if cached:
|
||||
_touch_geoip_marker() # recheck after the window; don't hammer
|
||||
return cached
|
||||
tag = GEOIP_MMDB_VERSION # cold cache + API down → pinned fallback
|
||||
|
||||
mmdb = _download_geoip_tag(tag)
|
||||
_prune_old_geoip_tags(mmdb.parent.name)
|
||||
_touch_geoip_marker()
|
||||
return mmdb
|
||||
|
|
|
|||
|
|
@ -2,11 +2,13 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
from playwright.sync_api import Browser, Playwright, sync_playwright
|
||||
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
||||
|
||||
from ._fpforge import Profile, generate_profile
|
||||
from ._geo import resolve_session_timezone
|
||||
from ._headless import make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from .download import ensure_binary
|
||||
|
|
@ -111,6 +113,8 @@ class InvisiblePlaywright:
|
|||
timezone: str = "",
|
||||
extra_prefs: Optional[Dict[str, Any]] = None,
|
||||
binary_path: Optional[str] = None,
|
||||
profile_dir: Optional[Union[str, Path]] = None,
|
||||
prep_recaptcha: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Args:
|
||||
|
|
@ -132,11 +136,26 @@ class InvisiblePlaywright:
|
|||
a float caps the motion in seconds.
|
||||
locale: BCP-47 tag (e.g. ``"en-US"``). Drives the
|
||||
``Accept-Language`` header and ``navigator.language``.
|
||||
timezone: IANA timezone (e.g. ``"America/New_York"``). Empty
|
||||
means use the host TZ.
|
||||
timezone: IANA zone (e.g. ``"America/New_York"``) — used as-is
|
||||
when set, the only way to force a specific zone. ``""``
|
||||
(default) or ``"auto"`` ALWAYS resolves from the egress IP:
|
||||
through the proxy when one is set, otherwise from the host's
|
||||
own public IP (one lookup + an offline mmdb). On failure: with
|
||||
a proxy it raises (a foreign proxy on the host TZ is the
|
||||
``timezone_mismatch`` signal); without a proxy it falls back to
|
||||
the host TZ so a transient lookup failure can't break launch.
|
||||
extra_prefs: Optional dict of Firefox prefs overlayed on top
|
||||
of the generated profile — useful for niche tweaks
|
||||
without monkey-patching the package.
|
||||
profile_dir: Path to a persistent Firefox profile directory.
|
||||
When set, the session uses ``launch_persistent_context()``
|
||||
so cookies, localStorage, sessionStorage, extensions, cache
|
||||
and prefs are kept on disk between runs. ``__enter__``
|
||||
returns a ``BrowserContext`` (not a ``Browser``) — use it
|
||||
directly: ``with InvisiblePlaywright(profile_dir=p) as ctx:
|
||||
page = ctx.new_page()``. First run creates the dir;
|
||||
subsequent runs reuse it. Pair with a stable ``seed=`` to
|
||||
also pin the fingerprint identity across runs.
|
||||
"""
|
||||
# Constrain to int31 — Firefox's `zoom.stealth.fpp.hw_seed` and
|
||||
# related stealth prefs are declared as ``int32_t`` in
|
||||
|
|
@ -154,12 +173,22 @@ class InvisiblePlaywright:
|
|||
self._timezone = timezone
|
||||
self._extra_prefs = extra_prefs
|
||||
self._binary_path = binary_path
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
# reCAPTCHA cookie pre-seed — opt-in. Gated server-side: if a
|
||||
# persistent profile_dir is in use, respect its existing cookies
|
||||
# and DON'T enable pre-seed (the profile owns its own state).
|
||||
self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None
|
||||
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
||||
self._pw: Optional[Playwright] = None
|
||||
self._browser: Optional[Browser] = None
|
||||
self._persistent_context: Optional[BrowserContext] = None
|
||||
self._virtual_display: Any = None
|
||||
|
||||
def __enter__(self) -> Browser:
|
||||
def __enter__(self) -> Union[Browser, BrowserContext]:
|
||||
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||
# concrete IANA zone before anything reads self._timezone. Fail-early
|
||||
# if a proxy is set but the egress zone can't be resolved.
|
||||
self._timezone = resolve_session_timezone(self._timezone, self._proxy)
|
||||
executable = self._binary_path or ensure_binary()
|
||||
prefs = self._build_prefs()
|
||||
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
||||
|
|
@ -168,6 +197,25 @@ class InvisiblePlaywright:
|
|||
|
||||
try:
|
||||
self._pw = sync_playwright().start()
|
||||
if self._profile_dir is not None:
|
||||
# Persistent context — cookies / localStorage / extensions /
|
||||
# prefs all live on disk between runs. Stealth prefs are
|
||||
# re-injected via firefox_user_prefs on every launch (Playwright
|
||||
# writes them to user.js, which overrides anything in
|
||||
# prefs.js inside the persistent dir).
|
||||
self._profile_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._persistent_context = self._pw.firefox.launch_persistent_context(
|
||||
user_data_dir=str(self._profile_dir),
|
||||
executable_path=str(executable),
|
||||
headless=pw_headless,
|
||||
firefox_user_prefs=prefs,
|
||||
proxy=playwright_proxy,
|
||||
args=self._extra_args,
|
||||
env=env,
|
||||
**self._persistent_context_kwargs(),
|
||||
)
|
||||
_patch_sync_new_page_sleep(self._persistent_context)
|
||||
return self._persistent_context
|
||||
self._browser = self._pw.firefox.launch(
|
||||
executable_path=str(executable),
|
||||
headless=pw_headless,
|
||||
|
|
@ -185,6 +233,22 @@ class InvisiblePlaywright:
|
|||
self._patch_new_context_defaults(self._browser)
|
||||
return self._browser
|
||||
|
||||
def _persistent_context_kwargs(self) -> Dict[str, Any]:
|
||||
"""Context-level kwargs accepted by launch_persistent_context.
|
||||
|
||||
Identical to ``_default_context_kwargs``: viewport / screen / DPR /
|
||||
color-scheme / locale / timezone_id. Up to firefox-4 we had to drop
|
||||
locale and timezone_id because Playwright's per-realm overrides
|
||||
called IDL methods (``docShell.languageOverride``,
|
||||
``docShell.overrideTimezone``) that weren't exposed by our patched
|
||||
build, causing launch_persistent_context to hang for 180s. From
|
||||
firefox-5 (C7 chiusura), the C++ ``overrideTimezone`` method is
|
||||
present and ``languageOverride`` was already there, so the
|
||||
per-realm overrides land and the persistent context starts in
|
||||
~20s like the non-persistent path.
|
||||
"""
|
||||
return self._default_context_kwargs()
|
||||
|
||||
def _patch_new_context_defaults(self, browser: Browser) -> None:
|
||||
"""Wrap ``browser.new_context`` so its defaults derive from the
|
||||
profile (viewport, screen, DPR, color-scheme). Users get a
|
||||
|
|
@ -192,12 +256,18 @@ class InvisiblePlaywright:
|
|||
"""
|
||||
original = browser.new_context
|
||||
defaults = self._default_context_kwargs()
|
||||
prep = self._prep_recaptcha
|
||||
profile = self._profile # pass the whole Profile (seed + browsing_history)
|
||||
tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region
|
||||
|
||||
def patched(**kw):
|
||||
merged = dict(defaults)
|
||||
merged.update(kw) # user-supplied wins
|
||||
ctx = original(**merged)
|
||||
_patch_sync_new_page_sleep(ctx)
|
||||
if prep:
|
||||
from ._recaptcha_seed import seed_recaptcha_cookies_sync
|
||||
seed_recaptcha_cookies_sync(ctx, profile, timezone=tz)
|
||||
return ctx
|
||||
|
||||
browser.new_context = patched # type: ignore[assignment]
|
||||
|
|
@ -226,6 +296,12 @@ class InvisiblePlaywright:
|
|||
self._teardown()
|
||||
|
||||
def _teardown(self) -> None:
|
||||
if self._persistent_context is not None:
|
||||
try:
|
||||
self._persistent_context.close()
|
||||
except Exception:
|
||||
pass
|
||||
self._persistent_context = None
|
||||
if self._browser is not None:
|
||||
try:
|
||||
self._browser.close()
|
||||
|
|
|
|||
|
|
@ -289,13 +289,29 @@ _BASELINE: Dict[str, Any] = {
|
|||
"network.dns.echconfig.enabled": False,
|
||||
"network.dns.use_https_rr_as_altsvc": False,
|
||||
|
||||
# === A/B VARIANT B: Fission disabled ===
|
||||
# Force single content-process model (e10s only, no BC outer/inner split).
|
||||
# Diagnostic for the FF150 BC-swap theory: if peet_ws/fppro/sannysoft
|
||||
# work with this off, the Juggler FF146 baseline breaks specifically on
|
||||
# cross-process navigation tracking.
|
||||
# === Fission / site-isolation disabled (FF146 Playwright parity) ===
|
||||
# Force a single content-process model. Three knobs are required in FF150:
|
||||
# upstream Playwright Firefox (FF146-based) only needed fission.autostart=False
|
||||
# because FF146's default isolation strategy was looser. FF150 ships with
|
||||
# fission.webContentIsolationStrategy=1 (IsolateEverything) which still
|
||||
# site-isolates cross-origin iframes into separate `webIsolated` content
|
||||
# processes EVEN WHEN fission.autostart is False. From the parent process's
|
||||
# point of view, those iframes get a Juggler Frame placeholder with no
|
||||
# docShell, no URL, and an execution context that wraps the wrong global,
|
||||
# so frame.evaluate() fails with cross-origin SOP errors and
|
||||
# element_handle.content_frame() returns None.
|
||||
#
|
||||
# Pinning the strategy to 0 keeps every cross-origin web iframe in the
|
||||
# parent's content process, where the Juggler code paths from the FF146
|
||||
# era expect them. processCount.webIsolated=1 is kept as belt-and-suspenders
|
||||
# in case some path still classifies an origin as webIsolated despite the
|
||||
# strategy change. It costs nothing to leave.
|
||||
#
|
||||
# See issue #20 + tests/test_cross_origin_iframe.py for the regression
|
||||
# sentinel that catches a future A/B flipping these back.
|
||||
"fission.autostart": False,
|
||||
"fission.autostart.session": False,
|
||||
"fission.webContentIsolationStrategy": 0, # IsolateNothing
|
||||
"dom.ipc.processCount.webIsolated": 1,
|
||||
|
||||
|
||||
|
|
@ -384,6 +400,21 @@ _WIN_VIRT_DESKTOP_WORKAROUNDS: Dict[str, Any] = {
|
|||
# Bugzilla refs: 1798091, 1524591, 1229829. Lowering the GPU sandbox to 0
|
||||
# restores hardware compositor + functional WebGL on alt desktops.
|
||||
"security.sandbox.gpu.level": 0,
|
||||
# Same root cause as above, content process side. Wrapper repo issue #18
|
||||
# (tab crash on cross-process navigation under headless=True). Sandbox
|
||||
# content level > 4 puts content processes on the sandbox's own
|
||||
# kAlternateWinstation (see security/sandbox/win/src/sandboxbroker/
|
||||
# sandboxBroker.cpp line 1113-1114:
|
||||
# `if (aSandboxLevel > 4) config->SetDesktop(kAlternateWinstation)`).
|
||||
# Combined with our CreateDesktop alt-desktop, that puts browser process
|
||||
# and content processes on DIFFERENT desktops. Cross-process navigation
|
||||
# then fails window parenting between parent and child, the content
|
||||
# process exits cleanly (exitCode=0, signal=null) and Playwright fires
|
||||
# page.on('crash') ~10s after page load. Lowering content sandbox to 4
|
||||
# keeps content processes on the same desktop as the browser process,
|
||||
# which is what we want here (still tight enough — level 4 blocks
|
||||
# file/registry write, network calls, hardware access).
|
||||
"security.sandbox.content.level": 4,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,12 @@
|
|||
import os
|
||||
import random
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright._fpforge import generate_profile
|
||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -15,3 +19,36 @@ def deterministic_rng():
|
|||
def sample_profile():
|
||||
"""A Profile generated from seed=42 for reuse across tests."""
|
||||
return generate_profile(seed=42)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def firefox_binary():
|
||||
"""Locate the patched Firefox binary for E2E tests, or skip cleanly.
|
||||
|
||||
Single source of truth for every E2E test (previously each test file had its
|
||||
own copy — and three of them silently ignored INVPW_BINARY_PATH, so they kept
|
||||
testing whatever was in the cache even when you pointed the suite at a
|
||||
specific build: a false-confidence trap). Lookup order:
|
||||
|
||||
1. ``INVPW_BINARY_PATH`` env var — point the whole suite at a local build
|
||||
or a freshly-extracted release (this is how the full-suite gate runs).
|
||||
2. Cached binary under ``cache_dir_for_version()`` (post ``fetch``).
|
||||
3. Skip — we never trigger an implicit multi-hundred-MB network download
|
||||
inside a test run.
|
||||
"""
|
||||
env_path = os.environ.get("INVPW_BINARY_PATH")
|
||||
if env_path:
|
||||
if Path(env_path).exists():
|
||||
return env_path
|
||||
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
|
||||
|
||||
if sys.platform not in BINARY_ENTRY_REL:
|
||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
||||
from invisible_playwright.download import cache_dir_for_version
|
||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
||||
if not entry.exists():
|
||||
pytest.skip(
|
||||
"patched Firefox binary not cached and INVPW_BINARY_PATH unset; "
|
||||
"set INVPW_BINARY_PATH=<firefox binary> or run `invisible-playwright fetch`"
|
||||
)
|
||||
return str(entry)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,28 @@
|
|||
import pytest
|
||||
|
||||
from invisible_playwright.constants import ARCHIVE_NAME, BINARY_BASENAME, BINARY_VERSION
|
||||
from invisible_playwright.constants import (
|
||||
ARCHIVE_NAME,
|
||||
BINARY_BASENAME,
|
||||
BINARY_ENTRY_REL,
|
||||
BINARY_VERSION,
|
||||
BROKEN_VERSIONS,
|
||||
FIREFOX_UPSTREAM_VERSION,
|
||||
RELEASE_URL_TEMPLATE,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_broken_versions_excludes_current():
|
||||
"""The current BINARY_VERSION must NEVER be in BROKEN_VERSIONS — otherwise
|
||||
every default ensure_binary() call would raise and the wrapper is unusable."""
|
||||
assert BINARY_VERSION not in BROKEN_VERSIONS
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_firefox_8_is_marked_broken():
|
||||
"""firefox-8 shipped without the juggler layer (undrivable by Playwright);
|
||||
it must stay flagged so a stale cache can't silently hand it to a user."""
|
||||
assert "firefox-8" in BROKEN_VERSIONS
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
|
|
@ -24,12 +46,158 @@ def test_archive_name_linux():
|
|||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_archive_name_unsupported_raises():
|
||||
def test_archive_name_macos_arm64():
|
||||
name = ARCHIVE_NAME("darwin", "arm64")
|
||||
assert name.endswith(".tar.gz")
|
||||
assert "macos-arm64" in name
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_archive_name_truly_unsupported_raises():
|
||||
with pytest.raises(NotImplementedError):
|
||||
ARCHIVE_NAME("darwin", "arm64")
|
||||
ARCHIVE_NAME("plan9", "x86_64")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_binary_basename_format():
|
||||
assert "firefox" in BINARY_BASENAME.lower()
|
||||
assert "stealth" in BINARY_BASENAME.lower()
|
||||
|
||||
|
||||
# ---- Comprehensive ARCHIVE_NAME edge cases -------------------------------- #
|
||||
# Same risk shape as bug #15: a missed format assumption (sha256sum binary
|
||||
# mode) silently produced wrong output. Same class of bug here would be
|
||||
# uppercase platform string or odd machine value passing through to a
|
||||
# wrong-named asset on the CDN and 404-ing.
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("platform_key,machine,expected_substring", [
|
||||
("win32", "AMD64", "win-x86_64.zip"), # Windows reports AMD64
|
||||
("win32", "amd64", "win-x86_64.zip"), # lowercase variant
|
||||
("win32", "x86_64", "win-x86_64.zip"), # mingw-style
|
||||
("linux", "x86_64", "linux-x86_64.tar.gz"), # standard Linux
|
||||
("linux", "AMD64", "linux-x86_64.tar.gz"), # odd but plausible
|
||||
("Linux", "x86_64", "linux-x86_64.tar.gz"), # case-insensitive platform
|
||||
("WIN32", "AMD64", "win-x86_64.zip"), # ALL CAPS platform
|
||||
])
|
||||
def test_archive_name_accepts_case_variations(platform_key, machine, expected_substring):
|
||||
"""sys.platform / platform.machine() return inconsistent casing across
|
||||
OS versions and Python versions. The asset filename must be stable
|
||||
regardless — otherwise the CDN 404s."""
|
||||
assert ARCHIVE_NAME(platform_key, machine).endswith(expected_substring)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("machine", ["i386", "i686", "ppc64le", "armv7l", "riscv64"])
|
||||
def test_archive_name_rejects_unsupported_arches(machine):
|
||||
"""Unsupported arches must raise NotImplementedError with the bad value
|
||||
in the message — silent fallback to a default arch would download the
|
||||
wrong binary, run, and fingerprint differently."""
|
||||
with pytest.raises(NotImplementedError, match=machine):
|
||||
ARCHIVE_NAME("linux", machine)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("machine", ["arm64", "aarch64"])
|
||||
def test_archive_name_arm64_supported(machine):
|
||||
"""ARM64 is shipped now (issue #6): both Linux aarch64 and macOS arm64.
|
||||
ARCHIVE_NAME must map both machine spellings to the canonical -arm64 asset."""
|
||||
assert ARCHIVE_NAME("linux", machine) == "firefox-150.0.1-stealth-linux-arm64.tar.gz"
|
||||
assert ARCHIVE_NAME("darwin", machine) == "firefox-150.0.1-stealth-macos-arm64.tar.gz"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("platform_key", ["freebsd", "cygwin", "openbsd"])
|
||||
def test_archive_name_rejects_unsupported_platforms(platform_key):
|
||||
"""win32/linux/darwin are supported; other platforms must raise, not
|
||||
silently pick one of the three."""
|
||||
with pytest.raises(NotImplementedError, match=platform_key):
|
||||
ARCHIVE_NAME(platform_key, "x86_64")
|
||||
|
||||
|
||||
# ---- ARCHIVE_NAME ↔ BINARY_ENTRY_REL invariant ---------------------------- #
|
||||
# For every supported platform there MUST be an entry in BINARY_ENTRY_REL,
|
||||
# otherwise ensure_binary() will raise NotImplementedError AFTER having
|
||||
# already downloaded a 110 MB tarball — terrible UX.
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_binary_entry_rel_covers_every_supported_platform():
|
||||
"""If ARCHIVE_NAME accepts a platform key, BINARY_ENTRY_REL must declare
|
||||
where the executable lives inside the archive for it."""
|
||||
for plat in ["win32", "linux", "darwin"]:
|
||||
ARCHIVE_NAME(plat, "x86_64") # must not raise
|
||||
assert plat in BINARY_ENTRY_REL, (
|
||||
f"ARCHIVE_NAME accepts {plat!r} but BINARY_ENTRY_REL has no entry "
|
||||
f"— ensure_binary() will fail late after a 110 MB download."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_binary_entry_rel_extension_matches_platform():
|
||||
"""firefox.exe on Windows, plain `firefox` on Linux."""
|
||||
assert BINARY_ENTRY_REL["win32"].endswith(".exe")
|
||||
assert not BINARY_ENTRY_REL["linux"].endswith(".exe")
|
||||
assert BINARY_ENTRY_REL["linux"] == "firefox"
|
||||
assert BINARY_ENTRY_REL["darwin"].endswith(".app/Contents/MacOS/firefox")
|
||||
|
||||
|
||||
# ---- RELEASE_URL_TEMPLATE shape ------------------------------------------- #
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_release_url_template_is_https():
|
||||
"""No http://. GitHub redirects http but we never accept the redirect."""
|
||||
assert RELEASE_URL_TEMPLATE.startswith("https://github.com/")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_release_url_template_has_required_placeholders():
|
||||
"""{tag} and {asset} must both be present, otherwise _resolve_asset_url
|
||||
won't format a usable URL and downloads fail with confusing 404s."""
|
||||
assert "{tag}" in RELEASE_URL_TEMPLATE
|
||||
assert "{asset}" in RELEASE_URL_TEMPLATE
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_release_url_template_formats_cleanly():
|
||||
"""Confirm .format() actually substitutes — catches typos like {tags}."""
|
||||
url = RELEASE_URL_TEMPLATE.format(tag="firefox-99", asset="thing.zip")
|
||||
assert "{" not in url and "}" not in url
|
||||
assert "firefox-99" in url
|
||||
assert "thing.zip" in url
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_release_url_points_at_owned_repo():
|
||||
"""The template MUST point at an owner/repo the maintainer actually
|
||||
controls. A typo here would direct everyone's downloads at a stranger's
|
||||
GitHub account — silent supply-chain risk."""
|
||||
assert "/feder-cr/invisible_playwright/" in RELEASE_URL_TEMPLATE, (
|
||||
f"RELEASE_URL_TEMPLATE was changed to point elsewhere: "
|
||||
f"{RELEASE_URL_TEMPLATE!r}. Update this test only if the move is intentional."
|
||||
)
|
||||
|
||||
|
||||
# ---- Firefox upstream version sanity -------------------------------------- #
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_firefox_upstream_version_is_three_part_semver():
|
||||
parts = FIREFOX_UPSTREAM_VERSION.split(".")
|
||||
assert len(parts) >= 2, f"version too short: {FIREFOX_UPSTREAM_VERSION!r}"
|
||||
for p in parts:
|
||||
assert p.isdigit(), f"non-numeric segment in {FIREFOX_UPSTREAM_VERSION!r}"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_binary_basename_includes_upstream_version():
|
||||
"""The basename references the upstream version, so the asset filename
|
||||
on the CDN encodes which Firefox was patched. Bumping FIREFOX_UPSTREAM_VERSION
|
||||
without rebuilding would leave stale binaries — this guards against
|
||||
accidentally desyncing the two."""
|
||||
assert FIREFOX_UPSTREAM_VERSION in BINARY_BASENAME
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("plat", ["win32", "linux"])
|
||||
def test_archive_name_includes_upstream_version(plat):
|
||||
"""Same desync guard, from the other direction."""
|
||||
assert FIREFOX_UPSTREAM_VERSION in ARCHIVE_NAME(plat, "x86_64")
|
||||
|
|
|
|||
278
tests/test_cross_origin_iframe.py
Normal file
278
tests/test_cross_origin_iframe.py
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
"""Regression tests for cross-origin / cross-process iframe interaction.
|
||||
|
||||
History: wrapper repo issue #20 reported that a third-party cookie
|
||||
consent iframe was completely unreachable from Playwright in 0.1.7 —
|
||||
``element_handle.content_frame()`` returned ``None``, ``frame.evaluate()``
|
||||
threw cross-origin SOP errors, and ``frame_locator().click()`` timed
|
||||
out.
|
||||
|
||||
Root cause was a missing pref. FF150 ships with
|
||||
``fission.webContentIsolationStrategy=1`` (IsolateEverything), which
|
||||
site-isolates cross-origin iframes into separate webIsolated content
|
||||
processes even when ``fission.autostart=False``. The Juggler code paths
|
||||
inherited from the FF146 era assume same-process iframes. The wrapper's
|
||||
``_BASELINE`` now pins the pref to 0 (IsolateNothing).
|
||||
|
||||
These tests exist so a future Firefox upgrade or a fingerprint A/B
|
||||
that flips this pref by accident cannot ship without a red CI signal.
|
||||
|
||||
Layers:
|
||||
* ``unit`` — ``_BASELINE`` contains the pref with the right value. No browser.
|
||||
* ``e2e`` — launch the real binary against a LOCAL HTTP harness on
|
||||
``127.0.0.1`` (two ports = two SOP origins) and verify the
|
||||
four protocol operations that regressed: frame URL tracking,
|
||||
``handle.content_frame()``, ``frame.evaluate()``, and
|
||||
``frame_locator(...).locator(...)`` element resolution.
|
||||
|
||||
The e2e tests run entirely offline. They never call out to a real site;
|
||||
the cross-origin shape is reproduced with two local HTTP servers on
|
||||
random free ports.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import socket
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright._fpforge import generate_profile
|
||||
from invisible_playwright.prefs import _BASELINE, translate_profile_to_prefs
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# Unit layer — fast, no browser, runs on every CI
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_baseline_pins_web_content_isolation_strategy_to_zero():
|
||||
"""Regression sentinel.
|
||||
|
||||
``fission.webContentIsolationStrategy`` MUST be 0 (IsolateNothing).
|
||||
The FF150 default is 1 (IsolateEverything), which site-isolates
|
||||
cross-origin iframes into separate webIsolated content processes
|
||||
and breaks Playwright frame tracking from the parent process.
|
||||
"""
|
||||
assert _BASELINE["fission.webContentIsolationStrategy"] == 0, (
|
||||
"fission.webContentIsolationStrategy must be 0 (IsolateNothing). "
|
||||
"If you bumped it for an A/B, cross-origin iframes will appear "
|
||||
"in page.frames with empty URLs and content_frame() will return "
|
||||
"None — see the changelog entry that introduced this test."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_baseline_keeps_fission_autostart_off():
|
||||
"""Belt for the suspenders above. All three prefs are required."""
|
||||
assert _BASELINE["fission.autostart"] is False
|
||||
assert _BASELINE["fission.autostart.session"] is False
|
||||
assert _BASELINE["dom.ipc.processCount.webIsolated"] == 1
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translated_profile_propagates_isolation_strategy():
|
||||
"""The fix must survive translate_profile_to_prefs, not just live in _BASELINE."""
|
||||
p = generate_profile(seed=42)
|
||||
prefs = translate_profile_to_prefs(p)
|
||||
assert prefs["fission.webContentIsolationStrategy"] == 0
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_extra_prefs_override_can_break_isolation_only_explicitly():
|
||||
"""If a caller wants to A/B isolation, they have to set it explicitly.
|
||||
The wrapper does not silently flip it back on.
|
||||
"""
|
||||
p = generate_profile(seed=42)
|
||||
prefs_default = translate_profile_to_prefs(p)
|
||||
assert prefs_default["fission.webContentIsolationStrategy"] == 0
|
||||
|
||||
prefs_ab = translate_profile_to_prefs(
|
||||
p, extra_prefs={"fission.webContentIsolationStrategy": 1}
|
||||
)
|
||||
assert prefs_ab["fission.webContentIsolationStrategy"] == 1
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# E2E layer — needs cached binary + bind to localhost ports
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _free_port() -> int:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.bind(("127.0.0.1", 0))
|
||||
port = s.getsockname()[1]
|
||||
s.close()
|
||||
return port
|
||||
|
||||
|
||||
class _SilentHandler(BaseHTTPRequestHandler):
|
||||
"""Suppress per-request access logging so pytest output stays clean."""
|
||||
PAYLOAD = b"" # set per-instance via subclassing
|
||||
|
||||
def log_message(self, *_a):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||
self.send_header("Cache-Control", "no-store")
|
||||
self.end_headers()
|
||||
self.wfile.write(self.PAYLOAD)
|
||||
|
||||
|
||||
def _serve(payload: bytes, port: int) -> HTTPServer:
|
||||
"""Start an HTTP server on 127.0.0.1:port serving ``payload`` on every GET."""
|
||||
handler_cls = type(
|
||||
"_H", (_SilentHandler,), {"PAYLOAD": payload}
|
||||
)
|
||||
srv = HTTPServer(("127.0.0.1", port), handler_cls)
|
||||
t = threading.Thread(target=srv.serve_forever, daemon=True)
|
||||
t.start()
|
||||
return srv
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cross_origin_harness():
|
||||
"""Spin up TWO local HTTP servers on different localhost ports.
|
||||
|
||||
Two ports = two distinct origins under SOP (same host, different port
|
||||
→ different origin). The parent page on port A embeds an iframe with
|
||||
src pointing at port B. Same cross-origin browsing-context shape as
|
||||
a parent-page-plus-third-party-iframe layout, fully offline.
|
||||
"""
|
||||
pa, pb = _free_port(), _free_port()
|
||||
parent_html = f"""<!doctype html><html><head><title>parent</title></head><body>
|
||||
<h1>parent</h1>
|
||||
<iframe id="ifr_plain" src="http://127.0.0.1:{pb}/child" width="300" height="120"></iframe>
|
||||
<iframe id="ifr_sandbox" src="http://127.0.0.1:{pb}/child" width="300" height="120"
|
||||
sandbox="allow-scripts allow-same-origin"></iframe>
|
||||
<iframe id="ifr_titled" src="http://127.0.0.1:{pb}/child" width="300" height="120"
|
||||
title="cross-origin titled iframe"></iframe>
|
||||
</body></html>""".encode("utf-8")
|
||||
child_html = b"""<!doctype html><html><body>
|
||||
<button id="ok">confirm</button>
|
||||
<button class="btn-primary">primary</button>
|
||||
<script>document.getElementById('ok').addEventListener('click', () => document.title = 'clicked')</script>
|
||||
</body></html>"""
|
||||
sa = _serve(parent_html, pa)
|
||||
sb = _serve(child_html, pb)
|
||||
try:
|
||||
yield {"parent_url": f"http://127.0.0.1:{pa}/", "child_origin": f"http://127.0.0.1:{pb}"}
|
||||
finally:
|
||||
sa.shutdown()
|
||||
sb.shutdown()
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_cross_origin_iframe_url_appears_in_page_frames(firefox_binary, cross_origin_harness):
|
||||
"""``page.frames`` must list the cross-origin iframe with its real URL.
|
||||
|
||||
Before the pref fix, the URL came back as '' because the navigation
|
||||
observer for the iframe fired in a different content process than
|
||||
the parent's FrameTree was registered in.
|
||||
"""
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
urls = [f.url for f in page.frames]
|
||||
assert any(cross_origin_harness["child_origin"] in (u or "") for u in urls), (
|
||||
f"no frame had the child origin in its URL; page.frames urls = {urls!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_cross_origin_iframe_content_frame_resolves(firefox_binary, cross_origin_harness):
|
||||
"""``handle.content_frame()`` must return a Frame (not None) for every
|
||||
cross-origin iframe shape we care about: plain, sandboxed, titled.
|
||||
"""
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
for sel in ("iframe#ifr_plain", "iframe#ifr_sandbox", "iframe#ifr_titled"):
|
||||
handle = page.query_selector(sel)
|
||||
assert handle is not None, f"{sel!r} not found in DOM"
|
||||
cf = handle.content_frame()
|
||||
assert cf is not None, f"{sel!r}: content_frame() returned None"
|
||||
assert cross_origin_harness["child_origin"] in (cf.url or ""), (
|
||||
f"{sel!r}: content_frame().url = {cf.url!r}, "
|
||||
f"expected child origin {cross_origin_harness['child_origin']!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_cross_origin_iframe_evaluate_returns_real_values(firefox_binary, cross_origin_harness):
|
||||
"""``frame.evaluate()`` inside the cross-origin iframe must work.
|
||||
|
||||
Pre-fix: every evaluate failed with a cross-origin SOP error because
|
||||
the iframe ended up with a stale/wrong execution context.
|
||||
"""
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
cf = page.query_selector("iframe#ifr_plain").content_frame()
|
||||
assert cf is not None
|
||||
href = cf.evaluate("() => location.href")
|
||||
assert cross_origin_harness["child_origin"] in href
|
||||
title = cf.evaluate("() => document.title")
|
||||
assert isinstance(title, str)
|
||||
n_buttons = cf.evaluate("() => document.querySelectorAll('button').length")
|
||||
assert n_buttons == 2
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_cross_origin_iframe_frame_locator_resolves_button(firefox_binary, cross_origin_harness):
|
||||
"""``frame_locator(...).locator(...)`` must reach the button inside the iframe."""
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||
|
||||
for selector in ("button#ok", "button.btn-primary"):
|
||||
cnt = page.frame_locator("iframe#ifr_plain").locator(selector).count()
|
||||
assert cnt == 1, f"locator({selector!r}) found {cnt} elements (expected 1)"
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_cross_origin_iframe_dispatch_event_click_works(firefox_binary, cross_origin_harness):
|
||||
"""End-to-end interaction via ``dispatch_event`` must succeed.
|
||||
|
||||
Plain ``.click()`` can trip Playwright's actionability heuristic on
|
||||
some third-party UIs (same on vanilla Playwright Firefox — not our
|
||||
regression), but ``dispatch_event('click')`` always works once the
|
||||
iframe is reachable.
|
||||
"""
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||
ctx = browser.new_context()
|
||||
page = ctx.new_page()
|
||||
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||
|
||||
page.frame_locator("iframe#ifr_plain").locator("button#ok").dispatch_event(
|
||||
"click", timeout=4_000
|
||||
)
|
||||
cf = page.query_selector("iframe#ifr_plain").content_frame()
|
||||
assert cf.evaluate("() => document.title") == "clicked"
|
||||
171
tests/test_detectors_e2e.py
Normal file
171
tests/test_detectors_e2e.py
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
"""E2E: run the REAL open-source detectors against the patched binary, on CI.
|
||||
|
||||
Instead of our own hand-rolled signal checks, this loads the actual detection
|
||||
libraries and uses their FULL API surface:
|
||||
|
||||
* BotD (@fingerprintjs/botd, MIT) — the client-side bot detector that
|
||||
FingerprintJS Pro itself uses. We assert the aggregate verdict
|
||||
(``detect().bot == False``) AND every one of its ~18 individual detectors
|
||||
(``getDetections()``) returns ``bot == False``. The per-detector view is
|
||||
why we could delete our hand-rolled ``test_botd_*`` mirrors — the real
|
||||
library now covers each detector, with the same granularity.
|
||||
* FingerprintJS open-source (MIT) — ``get()`` must return a ``visitorId``
|
||||
that is STABLE across two fresh launches with the same seed (an
|
||||
over-randomized spoof drifts), and a RICH component set (the fingerprint
|
||||
surface is real, not a stub).
|
||||
|
||||
Everything is hermetic: the libraries are vendored (tests/vendor/) and served
|
||||
from a localhost HTTP server — no external CDN call (Firefox tracking-protection
|
||||
blocks the CDN anyway) and no IP/network dependency. Runs identically on a dev
|
||||
box and on a GitHub runner.
|
||||
|
||||
NOT covered: FingerprintJS *Pro* (commercial, server-side, IP/residential
|
||||
analysis) — can't be self-hosted, stays the local realness gate.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import http.server
|
||||
import socketserver
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
_VENDOR = Path(__file__).parent / "vendor"
|
||||
_BOTD = "botd-2.0.0.esm.js"
|
||||
_FPJS = "fingerprintjs-5.2.0.umd.min.js"
|
||||
|
||||
_PAGE = f"""<!doctype html><html><head><meta charset="utf-8">
|
||||
<title>detectors</title>
|
||||
<script src="/{_FPJS}"></script>
|
||||
</head><body><h1 id="state">loading</h1>
|
||||
<script type="module">
|
||||
window.__botd = null; window.__fp = null; window.__err = "";
|
||||
(async () => {{
|
||||
try {{
|
||||
const Botd = await import("/{_BOTD}");
|
||||
const botd = await Botd.load(); // load() collects internally
|
||||
const verdict = botd.detect(); // {{bot:false}} | {{bot:true,botKind}}
|
||||
const raw = botd.getDetections() || {{}}; // per-detector verdicts
|
||||
const detections = {{}};
|
||||
for (const k in raw) detections[k] = {{ bot: raw[k].bot, botKind: raw[k].botKind || null }};
|
||||
window.__botd = {{ bot: verdict.bot, botKind: verdict.botKind || null, detections }};
|
||||
}} catch (e) {{ window.__err += " botd:" + e; }}
|
||||
try {{
|
||||
const fp = await FingerprintJS.load();
|
||||
const r = await fp.get();
|
||||
const keys = Object.keys(r.components || {{}});
|
||||
const errored = keys.filter(k => r.components[k] && "error" in r.components[k]);
|
||||
window.__fp = {{ visitorId: r.visitorId, componentKeys: keys, erroredComponents: errored }};
|
||||
}} catch (e) {{ window.__err += " fp:" + e; }}
|
||||
document.getElementById("state").textContent = "done";
|
||||
}})();
|
||||
</script></body></html>"""
|
||||
|
||||
|
||||
class _DetectorSite:
|
||||
"""Localhost server: `/` → the page; `/<lib>` → the vendored bundle."""
|
||||
|
||||
def __init__(self):
|
||||
page = _PAGE.encode()
|
||||
vendor = _VENDOR
|
||||
|
||||
class H(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self): # noqa: N802
|
||||
if self.path == "/" or self.path.startswith("/?"):
|
||||
body, ctype = page, "text/html; charset=utf-8"
|
||||
else:
|
||||
f = vendor / Path(self.path.lstrip("/")).name
|
||||
if not f.is_file():
|
||||
self.send_error(404); return
|
||||
body = f.read_bytes()
|
||||
ctype = "text/javascript; charset=utf-8"
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", ctype)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def log_message(self, *a):
|
||||
pass
|
||||
|
||||
self._srv = socketserver.TCPServer(("127.0.0.1", 0), H)
|
||||
self.port = self._srv.server_address[1]
|
||||
threading.Thread(target=self._srv.serve_forever, daemon=True).start()
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return f"http://127.0.0.1:{self.port}/"
|
||||
|
||||
def close(self):
|
||||
self._srv.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def detector_site():
|
||||
s = _DetectorSite()
|
||||
yield s
|
||||
s.close()
|
||||
|
||||
|
||||
def _run_detectors(firefox_binary, url):
|
||||
"""Launch the binary, load the page, return (botd, fp, err)."""
|
||||
with InvisiblePlaywright(seed=42, binary_path=firefox_binary) as browser:
|
||||
page = browser.new_page()
|
||||
page.goto(url, wait_until="load", timeout=45000)
|
||||
page.wait_for_function(
|
||||
"() => document.getElementById('state').textContent === 'done'",
|
||||
timeout=45000,
|
||||
)
|
||||
botd = page.evaluate("() => window.__botd")
|
||||
fp = page.evaluate("() => window.__fp")
|
||||
err = page.evaluate("() => window.__err")
|
||||
return botd, fp, err
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_botd_no_detector_flags_automation(firefox_binary, detector_site):
|
||||
"""The real BotD must not flag the build — aggregate AND every one of its
|
||||
individual detectors (webDriver/userAgent/appVersion/plugins/process/... ).
|
||||
"""
|
||||
botd, _fp, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert botd is not None, f"BotD produced no result (err:{err!r})"
|
||||
assert botd.get("bot") is False, (
|
||||
f"BotD aggregate flagged a bot: botKind={botd.get('botKind')!r}"
|
||||
)
|
||||
detections = botd.get("detections") or {}
|
||||
assert detections, f"BotD getDetections() returned nothing (err:{err!r})"
|
||||
flagged = {k: v.get("botKind") for k, v in detections.items() if v.get("bot")}
|
||||
assert not flagged, f"BotD individual detectors flagged automation: {flagged}"
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fingerprintjs_visitorid_stable_across_launches(firefox_binary, detector_site):
|
||||
"""FingerprintJS visitorId must be present and identical across two fresh
|
||||
launches with the same seed — a real browser is stable; an over-randomized
|
||||
spoof drifts (and a drifting fingerprint is itself a bot tell)."""
|
||||
_b1, fp1, err1 = _run_detectors(firefox_binary, detector_site.url)
|
||||
_b2, fp2, err2 = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert fp1 and fp1.get("visitorId"), f"no visitorId on run 1 (err:{err1!r})"
|
||||
assert fp2 and fp2.get("visitorId"), f"no visitorId on run 2 (err:{err2!r})"
|
||||
assert fp1["visitorId"] == fp2["visitorId"], (
|
||||
f"FingerprintJS visitorId drifted across launches: "
|
||||
f"{fp1['visitorId']!r} != {fp2['visitorId']!r} (per-session entropy = bot tell)"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fingerprintjs_collects_rich_fingerprint(firefox_binary, detector_site):
|
||||
"""FingerprintJS must collect a RICH component surface (a real browser
|
||||
exposes many signals; a stripped/blocked surface is itself suspicious).
|
||||
We don't assert zero errored components (some are legitimately unsupported
|
||||
per browser), only that the surface is substantial and the id computed."""
|
||||
_b, fp, err = _run_detectors(firefox_binary, detector_site.url)
|
||||
assert fp and fp.get("visitorId"), f"FingerprintJS produced no id (err:{err!r})"
|
||||
keys = fp.get("componentKeys") or []
|
||||
assert len(keys) >= 15, (
|
||||
f"FingerprintJS collected only {len(keys)} components — surface too thin "
|
||||
f"(suppressed signals are themselves a tell): {keys}"
|
||||
)
|
||||
|
|
@ -4,15 +4,20 @@ import tarfile
|
|||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import responses
|
||||
|
||||
from invisible_playwright.constants import BINARY_VERSION
|
||||
from invisible_playwright.constants import BINARY_VERSION, RELEASE_URL_TEMPLATE
|
||||
from invisible_playwright.download import (
|
||||
_download_file,
|
||||
_extract,
|
||||
_github_token,
|
||||
_parse_checksums,
|
||||
_parse_owner_repo,
|
||||
_resolve_asset_url,
|
||||
_sha256_file,
|
||||
cache_dir_for_version,
|
||||
cache_root,
|
||||
ensure_binary,
|
||||
)
|
||||
|
||||
|
|
@ -161,6 +166,166 @@ def test_parse_checksums_uses_last_token_as_filename():
|
|||
assert "some/nested/file.zip" in out
|
||||
|
||||
|
||||
# DL3 regression — issue #15 (LostBoxArt).
|
||||
# GNU coreutils `sha256sum` (and `shasum -b`) print filenames in BINARY MODE
|
||||
# with a leading `*`: "hash *filename". The parser used parts[-1] verbatim
|
||||
# so the key became "*filename" and lookups by bare filename returned None,
|
||||
# raising `RuntimeError: no SHA256 for {asset}` on every first-time fetch.
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_strips_star_prefix_binary_mode():
|
||||
"""`sha256sum -b` format (default on Linux when reading actual files)."""
|
||||
text = "abc123 *firefox.tar.gz\n"
|
||||
out = _parse_checksums(text)
|
||||
assert out == {"firefox.tar.gz": "abc123"}, (
|
||||
"binary-mode '*' prefix must be stripped from the filename key"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_handles_mixed_binary_and_text_mode():
|
||||
"""A single checksums.txt with one binary-mode line and one text-mode line.
|
||||
Both keys must be normalized (no `*` prefix) so consumers can use the bare
|
||||
filename as the lookup key regardless of how each line was produced."""
|
||||
text = (
|
||||
"aaa111 *firefox-win.zip\n"
|
||||
"bbb222 firefox-linux.tar.gz\n"
|
||||
)
|
||||
out = _parse_checksums(text)
|
||||
assert out == {"firefox-win.zip": "aaa111", "firefox-linux.tar.gz": "bbb222"}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_handles_multiple_leading_stars():
|
||||
"""`.lstrip("*")` strips any run of leading asterisks. Not a real sha256sum
|
||||
format but defensive — guarantees no `*` survives in any key."""
|
||||
text = "abc123 **doubled.zip\n"
|
||||
out = _parse_checksums(text)
|
||||
assert "doubled.zip" in out
|
||||
assert "**doubled.zip" not in out
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_handles_crlf_line_endings():
|
||||
"""sha256sum.exe on Windows writes CRLF. The .strip() on each line should
|
||||
consume the \\r so the key doesn't end up as 'firefox.zip\\r'."""
|
||||
text = "abc123 *firefox.zip\r\ndef456 other.tar.gz\r\n"
|
||||
out = _parse_checksums(text)
|
||||
assert out == {"firefox.zip": "abc123", "other.tar.gz": "def456"}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_handles_utf8_bom_at_start():
|
||||
"""Some Windows tools prepend a UTF-8 BOM. The first line shouldn't be lost."""
|
||||
text = "abc123 *firefox.zip\n"
|
||||
out = _parse_checksums(text)
|
||||
# The BOM stays attached to the hash field as a non-fatal artifact;
|
||||
# what matters is that the FILENAME key is parsed and normalized.
|
||||
keys = list(out.keys())
|
||||
assert "firefox.zip" in keys, f"BOM caused first line to be lost: keys={keys}"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_handles_indented_lines():
|
||||
"""Leading whitespace on a data line must not break parsing."""
|
||||
text = " abc123 *indented.zip\n"
|
||||
out = _parse_checksums(text)
|
||||
assert out == {"indented.zip": "abc123"}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_handles_trailing_whitespace():
|
||||
"""Trailing spaces on a line shouldn't end up in the key."""
|
||||
text = "abc123 *trailing.zip \n"
|
||||
out = _parse_checksums(text)
|
||||
# After .strip() the trailing spaces are gone, so the key is clean
|
||||
assert out == {"trailing.zip": "abc123"}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_real_world_sha256sum_b_output(tmp_path):
|
||||
"""End-to-end: invoke the actual `sha256sum` (or its Python equivalent)
|
||||
on a real file and verify the parser handles that output verbatim.
|
||||
|
||||
We can't depend on sha256sum being on PATH on Windows, so we synthesize
|
||||
the exact byte sequence that GNU coreutils 9.x produces."""
|
||||
fake_archive = tmp_path / "release.tar.gz"
|
||||
fake_archive.write_bytes(b"some content")
|
||||
sha = hashlib.sha256(fake_archive.read_bytes()).hexdigest()
|
||||
# Exact format coreutils prints in binary mode (default for files):
|
||||
# "<hash><SP>*<filename>\n"
|
||||
coreutils_output = f"{sha} *{fake_archive.name}\n"
|
||||
|
||||
out = _parse_checksums(coreutils_output)
|
||||
assert out == {"release.tar.gz": sha}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_text_mode_two_space_separator():
|
||||
"""`sha256sum --text` format uses two spaces. Must also parse cleanly
|
||||
and the key must be identical to the binary-mode case."""
|
||||
text = "abc123 textmode.zip\n"
|
||||
out = _parse_checksums(text)
|
||||
assert out == {"textmode.zip": "abc123"}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_empty_file_returns_empty_dict():
|
||||
assert _parse_checksums("") == {}
|
||||
assert _parse_checksums("\n\n\n") == {}
|
||||
assert _parse_checksums(" \n\t\n") == {}
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_checksums_all_comment_file_returns_empty_dict():
|
||||
"""A file with only comments shouldn't crash and shouldn't produce keys."""
|
||||
text = "# generated by release script\n# 2026-05-20\n"
|
||||
assert _parse_checksums(text) == {}
|
||||
|
||||
|
||||
# DL3 regression — full integration via ensure_binary: confirm the parser
|
||||
# bug from #15 cannot regress when the live release format is mimicked exactly.
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_ensure_binary_accepts_binary_mode_checksums(tmp_path, monkeypatch):
|
||||
"""Reproduce the EXACT format the GitHub release ships:
|
||||
<sha> *<filename>
|
||||
Before the #15 fix this raised
|
||||
RuntimeError: no SHA256 for {asset} in checksums.txt
|
||||
even though the asset and SHA were both present."""
|
||||
cache = tmp_path / "cache"
|
||||
monkeypatch.setattr("invisible_playwright.download.cache_root", lambda: cache)
|
||||
|
||||
archive_path = tmp_path / "archive.zip"
|
||||
archive_bytes = _make_zip(archive_path, "firefox.exe", b"PEX!")
|
||||
archive_sha = hashlib.sha256(archive_bytes).hexdigest()
|
||||
from invisible_playwright.constants import ARCHIVE_NAME
|
||||
asset = ARCHIVE_NAME("win32", "AMD64")
|
||||
|
||||
url_archive = (
|
||||
f"https://github.com/feder-cr/invisible_playwright/releases/download/"
|
||||
f"{BINARY_VERSION}/{asset}"
|
||||
)
|
||||
url_sums = (
|
||||
f"https://github.com/feder-cr/invisible_playwright/releases/download/"
|
||||
f"{BINARY_VERSION}/checksums.txt"
|
||||
)
|
||||
|
||||
responses.add(responses.GET, url_archive, body=archive_bytes, status=200,
|
||||
content_type="application/zip")
|
||||
# Binary-mode format (note the `*`): regression sentinel for #15.
|
||||
responses.add(
|
||||
responses.GET, url_sums,
|
||||
body=f"{archive_sha} *{asset}\n",
|
||||
status=200,
|
||||
)
|
||||
|
||||
# Force the platform branch the test mocks:
|
||||
monkeypatch.setattr("sys.platform", "win32")
|
||||
out = ensure_binary()
|
||||
# No RuntimeError means the parser accepted the `*`-prefixed key.
|
||||
assert out.exists()
|
||||
|
||||
|
||||
# DL4: unknown archive format (.rar) raises RuntimeError
|
||||
@pytest.mark.unit
|
||||
def test_extract_unknown_format_raises(tmp_path):
|
||||
|
|
@ -253,7 +418,7 @@ def test_github_token_none_when_unset(monkeypatch):
|
|||
# Bonus coverage: unsupported platform raises NotImplementedError before any HTTP
|
||||
@pytest.mark.unit
|
||||
def test_ensure_binary_unsupported_platform_raises(monkeypatch):
|
||||
monkeypatch.setattr("sys.platform", "darwin")
|
||||
monkeypatch.setattr("sys.platform", "freebsd") # win32/linux/darwin are supported
|
||||
import platform
|
||||
monkeypatch.setattr(platform, "machine", lambda: "AMD64")
|
||||
with pytest.raises(NotImplementedError, match="unsupported platform"):
|
||||
|
|
@ -375,3 +540,303 @@ def test_ensure_binary_missing_entry_after_extract_raises_linux(tmp_path, monkey
|
|||
|
||||
with pytest.raises(RuntimeError, match="binary not found after extraction"):
|
||||
ensure_binary()
|
||||
|
||||
|
||||
# ========================================================================== #
|
||||
# _resolve_asset_url — public-repo direct URL vs private-repo API resolution
|
||||
# ========================================================================== #
|
||||
# This function chooses between two code paths based on whether a GitHub
|
||||
# token is set. Both paths produce a downloadable URL but via different
|
||||
# mechanisms, and a regression here would surface as 404 / 403 / wrong
|
||||
# binary downloaded.
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_asset_url_public_returns_direct_url(monkeypatch):
|
||||
"""No token → return the direct releases/download URL verbatim."""
|
||||
monkeypatch.delenv("STEALTHFOX_GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
url = _resolve_asset_url("firefox-4", "thing.zip")
|
||||
assert url == RELEASE_URL_TEMPLATE.format(tag="firefox-4", asset="thing.zip")
|
||||
assert "api.github.com" not in url # public path must skip the API
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_asset_url_public_url_format_is_stable(monkeypatch):
|
||||
"""The exact URL shape is what GitHub clients have learned to cache.
|
||||
Changing it without bumping BINARY_VERSION would 404 on first fetch
|
||||
for every existing user — guard against accidental drift."""
|
||||
monkeypatch.delenv("STEALTHFOX_GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
url = _resolve_asset_url("firefox-4", "abc.tar.gz")
|
||||
assert url == (
|
||||
"https://github.com/feder-cr/invisible_playwright/releases/"
|
||||
"download/firefox-4/abc.tar.gz"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_resolve_asset_url_private_uses_api_with_token(monkeypatch):
|
||||
"""Token set → hit the API and return the asset.url (which 302s with
|
||||
Accept: application/octet-stream). The direct release URL would 404
|
||||
for a private repo even with the token in headers."""
|
||||
monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_fake")
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
|
||||
api_url = (
|
||||
"https://api.github.com/repos/feder-cr/invisible_playwright"
|
||||
"/releases/tags/firefox-4"
|
||||
)
|
||||
responses.add(
|
||||
responses.GET, api_url,
|
||||
json={"assets": [
|
||||
{"name": "other.zip", "url": "https://api.github.com/.../1"},
|
||||
{"name": "wanted.zip", "url": "https://api.github.com/.../2"},
|
||||
]},
|
||||
status=200,
|
||||
)
|
||||
url = _resolve_asset_url("firefox-4", "wanted.zip")
|
||||
assert url == "https://api.github.com/.../2"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_resolve_asset_url_private_raises_when_asset_missing(monkeypatch):
|
||||
"""If the asset name isn't on the release, raise — better to fail fast
|
||||
with the asset name in the message than to download something else."""
|
||||
monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_fake")
|
||||
api_url = (
|
||||
"https://api.github.com/repos/feder-cr/invisible_playwright"
|
||||
"/releases/tags/firefox-4"
|
||||
)
|
||||
responses.add(
|
||||
responses.GET, api_url,
|
||||
json={"assets": [{"name": "other.zip", "url": "x"}]},
|
||||
status=200,
|
||||
)
|
||||
with pytest.raises(RuntimeError, match="not-here.zip"):
|
||||
_resolve_asset_url("firefox-4", "not-here.zip")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_resolve_asset_url_private_propagates_api_4xx(monkeypatch):
|
||||
"""If the API returns 404 (release doesn't exist) or 401 (bad token),
|
||||
don't swallow it silently — raise so the user sees the real reason."""
|
||||
monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_fake")
|
||||
api_url = (
|
||||
"https://api.github.com/repos/feder-cr/invisible_playwright"
|
||||
"/releases/tags/firefox-99"
|
||||
)
|
||||
responses.add(responses.GET, api_url, status=404)
|
||||
with pytest.raises(requests.HTTPError):
|
||||
_resolve_asset_url("firefox-99", "thing.zip")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_resolve_asset_url_private_sends_auth_header(monkeypatch):
|
||||
"""The API call MUST include `Authorization: token <ghp_...>`, otherwise
|
||||
a private repo returns 404 and the user thinks the release is missing."""
|
||||
monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_secret")
|
||||
api_url = (
|
||||
"https://api.github.com/repos/feder-cr/invisible_playwright"
|
||||
"/releases/tags/firefox-4"
|
||||
)
|
||||
|
||||
captured = {}
|
||||
def callback(request):
|
||||
captured["auth"] = request.headers.get("Authorization")
|
||||
return (200, {}, '{"assets":[{"name":"x.zip","url":"https://x/y"}]}')
|
||||
responses.add_callback(responses.GET, api_url, callback=callback,
|
||||
content_type="application/json")
|
||||
_resolve_asset_url("firefox-4", "x.zip")
|
||||
assert captured["auth"] == "token ghp_secret"
|
||||
|
||||
|
||||
# ========================================================================== #
|
||||
# _download_file — file streaming + error propagation
|
||||
# ========================================================================== #
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_download_file_writes_full_payload_to_disk(tmp_path):
|
||||
"""A 200 OK returns the full body; the file on disk matches byte-for-byte."""
|
||||
url = "https://example.com/some-large.bin"
|
||||
payload = bytes(range(256)) * 1024 # 256 KB, varied bytes
|
||||
responses.add(responses.GET, url, body=payload, status=200)
|
||||
|
||||
dst = tmp_path / "downloaded.bin"
|
||||
_download_file(url, dst)
|
||||
assert dst.exists()
|
||||
assert dst.read_bytes() == payload
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_download_file_creates_parent_directories(tmp_path):
|
||||
"""The dst's parent may not exist yet — _download_file is expected to
|
||||
mkdir -p before writing. Without this, the first fetch on a clean
|
||||
machine raises FileNotFoundError because the cache dir doesn't exist."""
|
||||
url = "https://example.com/x.bin"
|
||||
responses.add(responses.GET, url, body=b"data", status=200)
|
||||
|
||||
deep = tmp_path / "a" / "b" / "c" / "x.bin"
|
||||
_download_file(url, deep)
|
||||
assert deep.exists()
|
||||
assert deep.read_bytes() == b"data"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_download_file_propagates_http_404(tmp_path):
|
||||
"""404s from the CDN must raise — silent 404 → empty file → SHA mismatch
|
||||
is a much worse failure mode."""
|
||||
url = "https://example.com/missing.bin"
|
||||
responses.add(responses.GET, url, status=404)
|
||||
with pytest.raises(requests.HTTPError):
|
||||
_download_file(url, tmp_path / "out.bin")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_download_file_propagates_http_500(tmp_path):
|
||||
"""Server errors must surface, not be swallowed as 'empty download'."""
|
||||
url = "https://example.com/broken.bin"
|
||||
responses.add(responses.GET, url, status=500)
|
||||
with pytest.raises(requests.HTTPError):
|
||||
_download_file(url, tmp_path / "out.bin")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_download_file_adds_auth_for_api_urls(monkeypatch, tmp_path):
|
||||
"""When downloading from api.github.com (private-repo flow), the
|
||||
request MUST include `Authorization: token <...>` and
|
||||
`Accept: application/octet-stream` — otherwise the API returns the
|
||||
asset JSON instead of the binary."""
|
||||
monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_secret")
|
||||
url = "https://api.github.com/repos/x/y/releases/assets/123"
|
||||
|
||||
captured = {}
|
||||
def callback(request):
|
||||
captured["auth"] = request.headers.get("Authorization")
|
||||
captured["accept"] = request.headers.get("Accept")
|
||||
return (200, {}, b"BIN!")
|
||||
responses.add_callback(responses.GET, url, callback=callback)
|
||||
|
||||
_download_file(url, tmp_path / "out.bin")
|
||||
assert captured["auth"] == "token ghp_secret"
|
||||
assert captured["accept"] == "application/octet-stream"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@responses.activate
|
||||
def test_download_file_does_not_send_auth_for_non_api_urls(monkeypatch, tmp_path):
|
||||
"""Public-repo flow hits github.com/.../releases/download/... directly.
|
||||
Sending an auth header to that URL is unnecessary and would leak the
|
||||
token in CDN access logs."""
|
||||
monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_secret")
|
||||
url = "https://github.com/feder-cr/invisible_playwright/releases/download/firefox-4/x.zip"
|
||||
|
||||
captured = {}
|
||||
def callback(request):
|
||||
captured["auth"] = request.headers.get("Authorization")
|
||||
return (200, {}, b"BIN!")
|
||||
responses.add_callback(responses.GET, url, callback=callback)
|
||||
|
||||
_download_file(url, tmp_path / "out.bin")
|
||||
assert captured["auth"] is None, (
|
||||
"Auth header leaked to a public CDN URL — would expose the token "
|
||||
"in GitHub's access logs."
|
||||
)
|
||||
|
||||
|
||||
# ========================================================================== #
|
||||
# cache_root + cache_dir_for_version — path resolution
|
||||
# ========================================================================== #
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_cache_root_returns_path():
|
||||
"""Must return a Path, not a string — downstream code uses .mkdir() etc."""
|
||||
p = cache_root()
|
||||
assert isinstance(p, Path)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_cache_root_contains_package_name():
|
||||
"""The cache dir should be identifiable as ours so users can `rm -rf`
|
||||
it without nuking other tools' caches."""
|
||||
p = cache_root()
|
||||
assert "invisible-playwright" in str(p).lower()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_cache_dir_for_version_appends_version_segment():
|
||||
"""Each binary version gets its own subdir so multiple versions can
|
||||
coexist (useful for downgrade / A-B testing)."""
|
||||
p = cache_dir_for_version("firefox-99")
|
||||
assert p.name == "firefox-99"
|
||||
assert p.parent == cache_root()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_cache_dir_for_version_defaults_to_current_binary_version():
|
||||
"""No-arg call uses the pinned BINARY_VERSION."""
|
||||
p = cache_dir_for_version()
|
||||
assert p.name == BINARY_VERSION
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_cache_dir_isolation_between_versions():
|
||||
"""firefox-3 and firefox-4 must NEVER share a directory — extraction
|
||||
would clobber one with the other and break downgrade."""
|
||||
a = cache_dir_for_version("firefox-3")
|
||||
b = cache_dir_for_version("firefox-4")
|
||||
assert a != b
|
||||
assert a.parent == b.parent # but they share the same root
|
||||
|
||||
|
||||
# ========================================================================== #
|
||||
# _parse_owner_repo — more edge cases
|
||||
# ========================================================================== #
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_owner_repo_extracts_from_canonical_template():
|
||||
"""Must work against the exact template stored in constants.py."""
|
||||
owner, repo = _parse_owner_repo(RELEASE_URL_TEMPLATE)
|
||||
assert owner and repo # something extracted
|
||||
assert "/" not in owner and "/" not in repo # no slashes in either segment
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("bad_template", [
|
||||
"http://github.com/x/y/releases/", # http, not https
|
||||
"https://gitlab.com/x/y/releases/", # wrong host
|
||||
"https://github.com/onlyone/releases/", # missing repo segment
|
||||
"", # empty
|
||||
"github.com/x/y/releases/", # missing scheme
|
||||
])
|
||||
def test_parse_owner_repo_rejects_malformed_urls(bad_template):
|
||||
"""Any URL that doesn't match the canonical shape must raise — silent
|
||||
None/empty extraction would build broken API URLs and confuse the user."""
|
||||
with pytest.raises(RuntimeError, match="cannot parse"):
|
||||
_parse_owner_repo(bad_template)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_parse_owner_repo_handles_repos_with_dashes_and_underscores():
|
||||
"""Repo names with -, _, . are valid on GitHub; the regex must accept them."""
|
||||
owner, repo = _parse_owner_repo(
|
||||
"https://github.com/my-org/my_cool.repo/releases/download/x/y.zip"
|
||||
)
|
||||
assert owner == "my-org"
|
||||
assert repo == "my_cool.repo"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ensure_binary_refuses_known_broken_version():
|
||||
"""A known-broken release (firefox-8, no juggler) must be refused with a
|
||||
clear error BEFORE any download — never silently handed to the user."""
|
||||
with pytest.raises(RuntimeError, match="known-broken"):
|
||||
ensure_binary("firefox-8")
|
||||
|
|
|
|||
|
|
@ -8,33 +8,9 @@ handling) do not need a binary and always run.
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def firefox_binary():
|
||||
"""Locate the patched Firefox binary or skip the calling test.
|
||||
|
||||
We do NOT trigger a network download here: ``ensure_binary`` would
|
||||
pull a multi-hundred-megabyte archive from a private release,
|
||||
which is not appropriate inside a unit/E2E test run. Instead we
|
||||
look for an already-cached binary; if missing we skip.
|
||||
"""
|
||||
if sys.platform not in BINARY_ENTRY_REL:
|
||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
||||
from invisible_playwright.download import cache_dir_for_version
|
||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
||||
if not entry.exists():
|
||||
pytest.skip(
|
||||
"patched Firefox binary not cached; run `invisible-playwright fetch` "
|
||||
"to enable E2E tests"
|
||||
)
|
||||
return str(entry)
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
510
tests/test_fingerprint_consistency.py
Normal file
510
tests/test_fingerprint_consistency.py
Normal file
|
|
@ -0,0 +1,510 @@
|
|||
"""Fingerprint consistency / lie-detection tests.
|
||||
|
||||
Complementary to test_fingerprint_surface.py: those tests ask "do you
|
||||
look like a real browser?" — these ask "are your fingerprint surfaces
|
||||
INTERNALLY CONSISTENT?"
|
||||
|
||||
Anti-bot systems catch spoofers not by checking each signal in
|
||||
isolation but by cross-checking related signals. If you spoof UA to
|
||||
"Windows" but leave navigator.platform as "Linux x86_64", or you spoof
|
||||
WebGL renderer in the main thread but not in a Web Worker, the
|
||||
inconsistency proves the spoof is fake.
|
||||
|
||||
Sources studied (all FOSS, MIT-licensed):
|
||||
- creepjs/src/lies/index.ts — the canonical lie detector
|
||||
- creepjs/src/worker/index.ts — main-vs-worker scope cross-check
|
||||
- creepjs/src/math/index.ts — Math.x(p) deterministic equality
|
||||
- creepjs/src/navigator/index.ts — UA/platform/oscpu invariants
|
||||
- niespodd/browser-fingerprinting README — worker hwConcurrency,
|
||||
plugin chain, perf.timeOrigin
|
||||
|
||||
Everything runs against `about:blank` with NO network and NO proxy.
|
||||
|
||||
Run only this file:
|
||||
pytest tests/test_fingerprint_consistency.py -m e2e -v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
|
||||
PIN = {
|
||||
"screen.width": 1920,
|
||||
"screen.height": 1080,
|
||||
"screen.avail_width": 1920,
|
||||
"screen.avail_height": 1040,
|
||||
"screen.dpr": 1.0,
|
||||
"hardware.concurrency": 8,
|
||||
"audio.sample_rate": 48000,
|
||||
"audio.max_channel_count": 2,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def page(firefox_binary):
|
||||
with InvisiblePlaywright(
|
||||
seed=42,
|
||||
pin=PIN,
|
||||
binary_path=firefox_binary,
|
||||
headless=True,
|
||||
) as browser:
|
||||
ctx = browser.new_context()
|
||||
p = ctx.new_page()
|
||||
p.goto("about:blank", timeout=30_000)
|
||||
yield p
|
||||
|
||||
|
||||
def _ev(page, expr):
|
||||
return page.evaluate(expr)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 1. Math determinism — same input MUST yield same output
|
||||
# Source: creepjs/src/math/index.ts
|
||||
# A wrapper that adds noise to Math.* (canvas-spoofing prefs) exposes
|
||||
# itself here: two consecutive calls with the same input must be
|
||||
# byte-identical.
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.parametrize("fn,arg", [
|
||||
("cos", "1e308"),
|
||||
("acos", "0.5"),
|
||||
("asin", "0.5"),
|
||||
("atan", "Math.PI"),
|
||||
("atanh", "0.5"),
|
||||
("cbrt", "Math.PI"),
|
||||
("cosh", "Math.PI"),
|
||||
("exp", "Math.PI"),
|
||||
("expm1", "Math.PI"),
|
||||
("log", "Math.PI"),
|
||||
("log1p", "Math.PI"),
|
||||
("log10", "Math.PI"),
|
||||
("sin", "Math.PI"),
|
||||
("sinh", "Math.PI"),
|
||||
("sqrt", "Math.PI"),
|
||||
("tan", "Math.PI"),
|
||||
("tanh", "Math.PI"),
|
||||
])
|
||||
def test_math_determinism(page, fn, arg):
|
||||
"""Math.<fn>(<arg>) must return the same value across 100 calls."""
|
||||
first, last, all_equal = _ev(page, f"""() => {{
|
||||
const r = [];
|
||||
for (let i = 0; i < 100; i++) r.push(Math.{fn}({arg}));
|
||||
return [r[0], r[99], r.every(x => Object.is(x, r[0]))];
|
||||
}}""")
|
||||
assert all_equal, (
|
||||
f"Math.{fn}({arg}) drifts across calls: first={first}, last={last}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_math_pow_two_arg_determinism(page):
|
||||
ok = _ev(page, """() => {
|
||||
const a = Math.pow(Math.PI, 2);
|
||||
for (let i = 0; i < 50; i++) {
|
||||
if (!Object.is(Math.pow(Math.PI, 2), a)) return false;
|
||||
}
|
||||
return true;
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 2. Worker scope vs main thread — navigator properties MUST agree
|
||||
# Source: creepjs/src/worker/index.ts
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def _worker_navigator_dict(page, props):
|
||||
expr = """async (props) => {
|
||||
const code = `
|
||||
self.onmessage = (e) => {
|
||||
const out = {};
|
||||
for (const p of e.data) {
|
||||
try { out[p] = self.navigator[p]; }
|
||||
catch (err) { out[p] = '<error: ' + err.message + '>'; }
|
||||
}
|
||||
if (out.languages && Array.isArray(out.languages)) {
|
||||
out.languages = [...out.languages];
|
||||
}
|
||||
self.postMessage(out);
|
||||
};
|
||||
`;
|
||||
const blob = new Blob([code], { type: 'application/javascript' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const worker = new Worker(url);
|
||||
try {
|
||||
const result = await new Promise((resolve, reject) => {
|
||||
worker.onmessage = (e) => resolve(e.data);
|
||||
worker.onerror = (e) => reject(new Error(e.message));
|
||||
worker.postMessage(props);
|
||||
setTimeout(() => reject(new Error('worker timeout')), 5000);
|
||||
});
|
||||
return result;
|
||||
} finally {
|
||||
worker.terminate();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
}"""
|
||||
return page.evaluate(expr, list(props))
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_worker_userAgent_matches_main(page):
|
||||
main = _ev(page, "navigator.userAgent")
|
||||
worker = _worker_navigator_dict(page, ("userAgent",))
|
||||
assert worker["userAgent"] == main, (
|
||||
f"UA drift main vs worker:\n main: {main!r}\n worker: {worker['userAgent']!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_worker_hardwareConcurrency_matches_main(page):
|
||||
main = _ev(page, "navigator.hardwareConcurrency")
|
||||
worker = _worker_navigator_dict(page, ("hardwareConcurrency",))
|
||||
assert worker["hardwareConcurrency"] == main
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_worker_language_matches_main(page):
|
||||
main = _ev(page, "navigator.language")
|
||||
worker = _worker_navigator_dict(page, ("language",))
|
||||
assert worker["language"] == main
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_worker_languages_matches_main(page):
|
||||
main = _ev(page, "[...navigator.languages]")
|
||||
worker = _worker_navigator_dict(page, ("languages",))
|
||||
assert list(worker["languages"]) == list(main)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_worker_platform_matches_main(page):
|
||||
main = _ev(page, "navigator.platform")
|
||||
worker = _worker_navigator_dict(page, ("platform",))
|
||||
assert worker["platform"] == main
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 3. Iframe scope vs window scope
|
||||
# Source: creepjs/src/lies/index.ts (getBehemothIframe pattern)
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def _iframe_navigator_dict(page, props):
|
||||
expr = """(props) => {
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.style.display = 'none';
|
||||
document.body.appendChild(iframe);
|
||||
const out = {};
|
||||
for (const p of props) {
|
||||
try { out[p] = iframe.contentWindow.navigator[p]; }
|
||||
catch (e) { out[p] = '<error: ' + e.message + '>'; }
|
||||
}
|
||||
if (Array.isArray(out.languages)) out.languages = [...out.languages];
|
||||
document.body.removeChild(iframe);
|
||||
return out;
|
||||
}"""
|
||||
return page.evaluate(expr, list(props))
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_iframe_userAgent_matches_window(page):
|
||||
main = _ev(page, "navigator.userAgent")
|
||||
iframe = _iframe_navigator_dict(page, ("userAgent",))
|
||||
assert iframe["userAgent"] == main
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_iframe_language_matches_window(page):
|
||||
main = _ev(page, "navigator.language")
|
||||
iframe = _iframe_navigator_dict(page, ("language",))
|
||||
assert iframe["language"] == main
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_iframe_hardwareConcurrency_matches_window(page):
|
||||
main = _ev(page, "navigator.hardwareConcurrency")
|
||||
iframe = _iframe_navigator_dict(page, ("hardwareConcurrency",))
|
||||
assert iframe["hardwareConcurrency"] == main
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_iframe_screen_matches_window(page):
|
||||
main = _ev(page, "[screen.width, screen.height]")
|
||||
iframe = _ev(page, """() => {
|
||||
const f = document.createElement('iframe');
|
||||
f.style.display = 'none';
|
||||
document.body.appendChild(f);
|
||||
const v = [f.contentWindow.screen.width, f.contentWindow.screen.height];
|
||||
document.body.removeChild(f);
|
||||
return v;
|
||||
}""")
|
||||
assert iframe == main
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 4. UA self-consistency (creepjs/src/navigator/index.ts)
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_navigator_platform_matches_userAgent_OS(page):
|
||||
ua = _ev(page, "navigator.userAgent")
|
||||
platform = _ev(page, "navigator.platform")
|
||||
if "Windows" in ua:
|
||||
assert "Win" in platform
|
||||
elif "Mac" in ua:
|
||||
assert "Mac" in platform
|
||||
elif "Linux" in ua or "X11" in ua:
|
||||
assert "Linux" in platform or "X11" in platform
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_navigator_oscpu_matches_userAgent(page):
|
||||
"""Firefox-only: navigator.oscpu must correlate with UA OS."""
|
||||
ua = _ev(page, "navigator.userAgent")
|
||||
oscpu = _ev(page, "navigator.oscpu || ''")
|
||||
if not oscpu:
|
||||
pytest.skip("navigator.oscpu not exposed")
|
||||
if "Windows" in ua:
|
||||
assert "Windows" in oscpu
|
||||
elif "Linux" in ua:
|
||||
assert "Linux" in oscpu
|
||||
elif "Mac" in ua:
|
||||
assert "Mac" in oscpu
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString)
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def _is_native_toString(text, fn_name):
|
||||
"""Mirror of CreepJS hasKnownToString — accept the engine-specific
|
||||
native patterns (single-line on V8, multi-line on SpiderMonkey)."""
|
||||
import re as _re
|
||||
name = _re.escape(fn_name)
|
||||
patterns = [
|
||||
rf"^function {name}\(\) \{{ \[native code\] \}}$",
|
||||
rf"^function get {name}\(\) \{{ \[native code\] \}}$",
|
||||
rf"^function {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$",
|
||||
rf"^function get {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$",
|
||||
]
|
||||
return any(_re.match(p, text) for p in patterns)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.parametrize("native_fn,name", [
|
||||
("Function.prototype.toString", "toString"),
|
||||
("Function.prototype.bind", "bind"),
|
||||
("Function.prototype.call", "call"),
|
||||
("Function.prototype.apply", "apply"),
|
||||
("Object.getOwnPropertyDescriptor", "getOwnPropertyDescriptor"),
|
||||
("Object.defineProperty", "defineProperty"),
|
||||
("Array.prototype.slice", "slice"),
|
||||
("JSON.stringify", "stringify"),
|
||||
])
|
||||
def test_native_function_self_toString_matches(page, native_fn, name):
|
||||
"""Each native function's `.toString()` must match its engine's
|
||||
native pattern. A Proxy wrapper or function-rewrite leaks here."""
|
||||
text = _ev(page, f"{native_fn}.toString()")
|
||||
assert _is_native_toString(text, name), (
|
||||
f"{native_fn}.toString() not native-shape: {text!r}"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 6. AudioContext / WebGL determinism
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_audio_offline_context_deterministic(page):
|
||||
"""OfflineAudioContext: same graph → byte-identical output."""
|
||||
ok = _ev(page, """async () => {
|
||||
async function render() {
|
||||
const ctx = new (window.OfflineAudioContext ||
|
||||
window.webkitOfflineAudioContext)(1, 5000, 44100);
|
||||
const osc = ctx.createOscillator();
|
||||
osc.connect(ctx.destination);
|
||||
osc.start(0);
|
||||
const buf = await ctx.startRendering();
|
||||
return Array.from(buf.getChannelData(0).slice(0, 50));
|
||||
}
|
||||
const a = await render();
|
||||
const b = await render();
|
||||
return JSON.stringify(a) === JSON.stringify(b);
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_webgl_getParameter_deterministic(page):
|
||||
"""WebGL parameters must not drift across reads."""
|
||||
ok = _ev(page, """() => {
|
||||
const c = document.createElement('canvas');
|
||||
const gl = c.getContext('webgl');
|
||||
if (!gl) return false;
|
||||
const params = [gl.MAX_TEXTURE_SIZE, gl.MAX_VIEWPORT_DIMS,
|
||||
gl.MAX_RENDERBUFFER_SIZE, gl.MAX_VERTEX_ATTRIBS];
|
||||
const ref = JSON.stringify(params.map(p => gl.getParameter(p)));
|
||||
for (let i = 0; i < 50; i++) {
|
||||
if (JSON.stringify(params.map(p => gl.getParameter(p))) !== ref) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 7. Locale ↔ Intl cross-consistency
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_navigator_language_matches_Intl_locale(page):
|
||||
"""navigator.language base must agree with Intl.DateTimeFormat locale."""
|
||||
nav = _ev(page, "navigator.language").split("-")[0]
|
||||
intl = _ev(page,
|
||||
"Intl.DateTimeFormat().resolvedOptions().locale").split("-")[0]
|
||||
assert nav == intl, (
|
||||
f"navigator.language base={nav!r} vs Intl={intl!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_navigator_language_matches_Intl_NumberFormat(page):
|
||||
nav = _ev(page, "navigator.language").split("-")[0]
|
||||
num = _ev(page,
|
||||
"Intl.NumberFormat().resolvedOptions().locale").split("-")[0]
|
||||
assert nav == num
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_navigator_language_matches_Intl_Collator(page):
|
||||
nav = _ev(page, "navigator.language").split("-")[0]
|
||||
col = _ev(page,
|
||||
"(new Intl.Collator()).resolvedOptions().locale").split("-")[0]
|
||||
assert nav == col
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 8. Property descriptor shape lies
|
||||
# Spoofers using Object.defineProperty(navigator, prop, {value: ...})
|
||||
# leave a 'value' field on the descriptor — real native props use a getter.
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
_DESCRIPTOR_NATIVE_PROPS = [
|
||||
"userAgent", "platform", "hardwareConcurrency", "language", "languages",
|
||||
"vendor", "appVersion", "appName", "appCodeName", "doNotTrack",
|
||||
"cookieEnabled", "onLine", "product", "productSub", "buildID", "oscpu",
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.parametrize("prop", _DESCRIPTOR_NATIVE_PROPS)
|
||||
def test_navigator_property_descriptor_is_getter_not_value(page, prop):
|
||||
"""Each spoofable navigator.* property must be defined via a native
|
||||
getter — NOT Object.defineProperty(..., {value: x}). The value-field
|
||||
descriptor is the lazy spoof leak CreepJS catches."""
|
||||
has_lie = _ev(page, f"""() => {{
|
||||
let proto = navigator;
|
||||
let descriptor = null;
|
||||
while (proto && !descriptor) {{
|
||||
descriptor = Object.getOwnPropertyDescriptor(proto, {prop!r});
|
||||
proto = Object.getPrototypeOf(proto);
|
||||
}}
|
||||
if (!descriptor) return null;
|
||||
return 'value' in descriptor;
|
||||
}}""")
|
||||
if has_lie is None:
|
||||
pytest.skip(f"navigator.{prop} not exposed")
|
||||
assert has_lie is False, (
|
||||
f"navigator.{prop} descriptor exposes 'value' field — lazy spoof"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 9. performance.timeOrigin + monotonic
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_performance_timeOrigin_stable(page):
|
||||
assert _ev(page,
|
||||
"performance.timeOrigin === performance.timeOrigin")
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_performance_now_monotonic(page):
|
||||
ok = _ev(page, """() => {
|
||||
let prev = performance.now();
|
||||
for (let i = 0; i < 100; i++) {
|
||||
const cur = performance.now();
|
||||
if (cur < prev) return false;
|
||||
prev = cur;
|
||||
}
|
||||
return true;
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 10. Window dimension invariants
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_window_inner_not_larger_than_outer(page):
|
||||
inner, outer = _ev(page, "[window.innerWidth, window.outerWidth]")
|
||||
assert inner <= outer
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_screen_avail_not_larger_than_screen(page):
|
||||
aw, w = _ev(page, "[screen.availWidth, screen.width]")
|
||||
ah, h = _ev(page, "[screen.availHeight, screen.height]")
|
||||
assert aw <= w and ah <= h
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 11. Firefox UA invariants
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_firefox_UA_implies_empty_vendor(page):
|
||||
"""Firefox: navigator.vendor === ''"""
|
||||
if "Firefox" not in _ev(page, "navigator.userAgent"):
|
||||
pytest.skip("Firefox-only invariant")
|
||||
if "Chrome" in _ev(page, "navigator.userAgent"):
|
||||
pytest.skip("Chrome+Firefox UA — likely synthetic")
|
||||
assert _ev(page, "navigator.vendor") == ""
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_firefox_appVersion_short_form(page):
|
||||
"""Real Firefox's appVersion is '5.0 (Windows)' form, not the full UA."""
|
||||
if "Firefox" not in _ev(page, "navigator.userAgent"):
|
||||
pytest.skip("Firefox-only invariant")
|
||||
av = _ev(page, "navigator.appVersion")
|
||||
ua = _ev(page, "navigator.userAgent")
|
||||
assert av.startswith("5.0 (")
|
||||
assert len(av) < len(ua)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_firefox_UA_implies_appName_Netscape(page):
|
||||
"""navigator.appName === 'Netscape' (historical invariant)."""
|
||||
if "Firefox" not in _ev(page, "navigator.userAgent"):
|
||||
pytest.skip("Firefox-only invariant")
|
||||
assert _ev(page, "navigator.appName") == "Netscape"
|
||||
238
tests/test_fingerprint_surface.py
Normal file
238
tests/test_fingerprint_surface.py
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
"""Fingerprint surface tests — replicate the checks performed by the canonical
|
||||
anti-bot detection libraries against an OFFLINE browser session.
|
||||
|
||||
Each test asserts the SAME thing the upstream detector would flag. A pass
|
||||
here means our patched build appears human to that detector; a fail
|
||||
means a real stealth hole that anti-bot kits would exploit in production.
|
||||
|
||||
Detector libraries studied (all FOSS, MIT-licensed):
|
||||
- github.com/fingerprintjs/BotD — 19 detectors, the most
|
||||
widely deployed client-side
|
||||
bot detector
|
||||
- github.com/abrahamjuliot/creepjs — headless / stealth / lies
|
||||
modules
|
||||
- github.com/fingerprintjs/fingerprintjs — canvas / audio / color /
|
||||
touch consistency
|
||||
- github.com/antoinevastel/fpscanner — UA / platform / oscpu
|
||||
cross-checks
|
||||
- bot.sannysoft.com — classic Puppeteer harness
|
||||
|
||||
Everything runs against `about:blank` with NO network and NO proxy. The
|
||||
suite is intended to be part of the release-gate: pre-push hook runs
|
||||
`pytest -m e2e` and these tests must be green on every release.
|
||||
|
||||
Run only this file:
|
||||
pytest tests/test_fingerprint_surface.py -m e2e -v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# Inline PIN — a coherent mid-range Windows desktop. Not user-config:
|
||||
# these specific values are what the surface tests assert against.
|
||||
# Keep PIN small (only fields that JS exposes) and stable across runs.
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
PIN = {
|
||||
"screen.width": 1920,
|
||||
"screen.height": 1080,
|
||||
"screen.avail_width": 1920,
|
||||
"screen.avail_height": 1040,
|
||||
"screen.dpr": 1.0,
|
||||
"hardware.concurrency": 8,
|
||||
"audio.sample_rate": 48000,
|
||||
"audio.max_channel_count": 2,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def page(firefox_binary):
|
||||
"""One headless browser shared across the whole module.
|
||||
~20s startup paid once, then every test runs in ~50ms."""
|
||||
with InvisiblePlaywright(
|
||||
seed=42,
|
||||
pin=PIN,
|
||||
binary_path=firefox_binary,
|
||||
headless=True,
|
||||
) as browser:
|
||||
ctx = browser.new_context()
|
||||
p = ctx.new_page()
|
||||
p.goto("about:blank", timeout=30_000)
|
||||
yield p
|
||||
|
||||
|
||||
def _ev(page, expr):
|
||||
return page.evaluate(expr)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# sannysoft.com — classic Puppeteer detection harness
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_sannysoft_chrome_object_consistency(page):
|
||||
"""Firefox UA + window.chrome present = bot-framework leak."""
|
||||
if "Firefox" in _ev(page, "navigator.userAgent"):
|
||||
assert not _ev(page, "typeof window.chrome !== 'undefined'")
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_sannysoft_permissions_query_works(page):
|
||||
"""navigator.permissions.query() must return a proper PermissionStatus."""
|
||||
ok = _ev(page, """async () => {
|
||||
if (!navigator.permissions || !navigator.permissions.query) return false;
|
||||
try {
|
||||
const r = await navigator.permissions.query({name: 'notifications'});
|
||||
return r && typeof r.state === 'string';
|
||||
} catch (e) { return false; }
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_sannysoft_iframe_chrome_not_leaked(page):
|
||||
"""iframe.contentWindow.chrome must not leak on Firefox UA."""
|
||||
if "Firefox" not in _ev(page, "navigator.userAgent"):
|
||||
pytest.skip("Firefox-only invariant")
|
||||
leaks = _ev(page, """() => {
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.style.display = 'none';
|
||||
document.body.appendChild(iframe);
|
||||
const is = typeof iframe.contentWindow.chrome !== 'undefined';
|
||||
document.body.removeChild(iframe);
|
||||
return is;
|
||||
}""")
|
||||
assert not leaks
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_sannysoft_iframe_languages_not_empty(page):
|
||||
"""Iframe-scope navigator.languages must have ≥1 entry."""
|
||||
n = _ev(page, """() => {
|
||||
const f = document.createElement('iframe');
|
||||
f.style.display = 'none';
|
||||
document.body.appendChild(f);
|
||||
const len = f.contentWindow.navigator.languages.length;
|
||||
document.body.removeChild(f);
|
||||
return len;
|
||||
}""")
|
||||
assert n > 0
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# FingerprintJS — fingerprint surface coherence
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpjs_canvas_2d_context_returns_valid(page):
|
||||
ok = _ev(page, """() => {
|
||||
const c = document.createElement('canvas');
|
||||
c.width = 100; c.height = 100;
|
||||
const ctx = c.getContext('2d');
|
||||
if (!ctx) return false;
|
||||
ctx.fillText('test', 10, 10);
|
||||
const data = c.toDataURL();
|
||||
return data.length > 100 && data.startsWith('data:image/png;base64');
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpjs_audio_context_works(page):
|
||||
ok = _ev(page, """async () => {
|
||||
try {
|
||||
const ctx = new (window.OfflineAudioContext ||
|
||||
window.webkitOfflineAudioContext)(1, 5000, 44100);
|
||||
const osc = ctx.createOscillator();
|
||||
osc.connect(ctx.destination);
|
||||
osc.start(0);
|
||||
const buf = await ctx.startRendering();
|
||||
return buf && buf.length > 0;
|
||||
} catch (e) { return false; }
|
||||
}""")
|
||||
assert ok
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpjs_color_gamut_query_works(page):
|
||||
"""matchMedia('(color-gamut: ...)') must match at least srgb."""
|
||||
ok = _ev(page, """matchMedia('(color-gamut: srgb)').matches ||
|
||||
matchMedia('(color-gamut: p3)').matches ||
|
||||
matchMedia('(color-gamut: rec2020)').matches""")
|
||||
assert ok
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpjs_screen_color_depth_realistic(page):
|
||||
"""Atypical color depths are headless-distinctive."""
|
||||
cd = _ev(page, "screen.colorDepth")
|
||||
assert cd in (24, 30, 32)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# PIN-locked surfaces (the values declared in PIN above)
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_pin_screen_width_lands_in_screen_object(page):
|
||||
assert _ev(page, "screen.width") == PIN["screen.width"]
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_pin_screen_height_lands_in_screen_object(page):
|
||||
assert _ev(page, "screen.height") == PIN["screen.height"]
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_pin_hardware_concurrency_lands_in_navigator(page):
|
||||
assert (_ev(page, "navigator.hardwareConcurrency")
|
||||
== PIN["hardware.concurrency"])
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_pin_audio_sample_rate_lands_in_AudioContext(page):
|
||||
assert _ev(page,
|
||||
"(new (window.AudioContext||window.webkitAudioContext)()).sampleRate"
|
||||
) == PIN["audio.sample_rate"]
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_pin_audio_max_channels_lands_in_destination(page):
|
||||
assert _ev(page,
|
||||
"(new (window.AudioContext||window.webkitAudioContext)())"
|
||||
".destination.maxChannelCount"
|
||||
) == PIN["audio.max_channel_count"]
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# fpscanner-style cross-checks
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpscanner_ua_vs_platform_consistent(page):
|
||||
"""UA OS substring must agree with navigator.platform OS substring."""
|
||||
ua = _ev(page, "navigator.userAgent")
|
||||
platform = _ev(page, "navigator.platform")
|
||||
if "Windows" in ua:
|
||||
assert "Win" in platform, f"UA Win but platform={platform!r}"
|
||||
elif "Mac" in ua:
|
||||
assert "Mac" in platform
|
||||
elif "Linux" in ua:
|
||||
assert "Linux" in platform or "X11" in platform
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fpscanner_no_userAgentData_on_firefox(page):
|
||||
"""navigator.userAgentData is Chromium-only. Presence on Firefox UA = bot."""
|
||||
if "Firefox" in _ev(page, "navigator.userAgent"):
|
||||
assert not _ev(page, "'userAgentData' in navigator")
|
||||
288
tests/test_geo.py
Normal file
288
tests/test_geo.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution).
|
||||
|
||||
Covers: the precedence policy (resolve_session_timezone), proxy→requests
|
||||
translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked
|
||||
mmdb). No real network or mmdb is touched.
|
||||
"""
|
||||
import sys
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import _geo
|
||||
from invisible_playwright._geo import (
|
||||
GeoTimezoneError,
|
||||
_proxies_for_requests,
|
||||
_proxy_is_set,
|
||||
discover_egress_ip,
|
||||
ip_to_timezone,
|
||||
resolve_session_timezone,
|
||||
)
|
||||
|
||||
SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"}
|
||||
HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"}
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# _proxy_is_set
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize(
|
||||
"proxy,expected",
|
||||
[
|
||||
(None, False),
|
||||
({}, False),
|
||||
({"server": ""}, False),
|
||||
({"server": " "}, False),
|
||||
({"server": "direct://"}, False),
|
||||
({"server": "DIRECT://"}, False),
|
||||
({"server": "socks5://h:1"}, True),
|
||||
({"server": "http://h:8080"}, True),
|
||||
],
|
||||
)
|
||||
def test_proxy_is_set(proxy, expected):
|
||||
assert _proxy_is_set(proxy) is expected
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# _proxies_for_requests — scheme + credential translation
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
def test_proxies_socks5_uses_socks5h_remote_dns():
|
||||
out = _proxies_for_requests(SOCKS)
|
||||
assert out["http"] == "socks5h://u:p@gw.example:1080"
|
||||
assert out["https"] == out["http"]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxies_socks4_scheme():
|
||||
out = _proxies_for_requests({"server": "socks4://gw:1080"})
|
||||
assert out["http"] == "socks4://gw:1080"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxies_http_and_https_schemes():
|
||||
assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080"
|
||||
out = _proxies_for_requests({"server": "https://gw:8443"})
|
||||
assert out["https"] == "https://gw:8443"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxies_no_scheme_defaults_to_http():
|
||||
out = _proxies_for_requests({"server": "gw.example:3128"})
|
||||
assert out["http"] == "http://gw.example:3128"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxies_credentials_are_url_encoded():
|
||||
out = _proxies_for_requests(
|
||||
{"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"}
|
||||
)
|
||||
# '@', ':' and '/' in creds must be percent-encoded so they don't break
|
||||
# the proxy URL parsing.
|
||||
assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxies_no_credentials_has_no_auth_prefix():
|
||||
out = _proxies_for_requests({"server": "socks5://gw:1080"})
|
||||
assert out["http"] == "socks5h://gw:1080"
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# discover_egress_ip — mocked requests
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
class _FakeResp:
|
||||
def __init__(self, text, status=200):
|
||||
self.text = text
|
||||
self._status = status
|
||||
|
||||
def raise_for_status(self):
|
||||
if self._status >= 400:
|
||||
raise RuntimeError(f"HTTP {self._status}")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_discover_egress_ip_first_endpoint_wins(monkeypatch):
|
||||
calls = []
|
||||
|
||||
def fake_get(url, **kw):
|
||||
calls.append(url)
|
||||
return _FakeResp("203.0.113.7\n")
|
||||
|
||||
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||
assert discover_egress_ip(SOCKS) == "203.0.113.7"
|
||||
assert len(calls) == 1 # stopped at the first success
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch):
|
||||
seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")])
|
||||
|
||||
def fake_get(url, **kw):
|
||||
return next(seq)
|
||||
|
||||
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||
assert discover_egress_ip(HTTP) == "198.51.100.42"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_discover_egress_ip_all_fail_raises(monkeypatch):
|
||||
def fake_get(url, **kw):
|
||||
raise OSError("connection refused")
|
||||
|
||||
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
discover_egress_ip(SOCKS)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_discover_egress_ip_no_proxy_is_direct(monkeypatch):
|
||||
# proxy=None → direct request, requests.get must get proxies=None.
|
||||
seen = {}
|
||||
|
||||
def fake_get(url, **kw):
|
||||
seen["proxies"] = kw.get("proxies", "MISSING")
|
||||
return _FakeResp("192.0.2.55")
|
||||
|
||||
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||
assert discover_egress_ip(None) == "192.0.2.55"
|
||||
assert seen["proxies"] is None
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# ip_to_timezone — mocked mmdb reader
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
class _FakeReader:
|
||||
def __init__(self, record):
|
||||
self._record = record
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
def get(self, ip):
|
||||
return self._record
|
||||
|
||||
|
||||
def _install_fake_maxminddb(monkeypatch, record):
|
||||
mod = types.ModuleType("maxminddb")
|
||||
mod.open_database = lambda path: _FakeReader(record)
|
||||
monkeypatch.setitem(sys.modules, "maxminddb", mod)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ip_to_timezone_reads_location_time_zone(monkeypatch):
|
||||
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}})
|
||||
assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ip_to_timezone_ip_absent_raises(monkeypatch):
|
||||
_install_fake_maxminddb(monkeypatch, None)
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ip_to_timezone_missing_zone_raises(monkeypatch):
|
||||
_install_fake_maxminddb(monkeypatch, {"location": {}})
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_ip_to_timezone_invalid_iana_raises(monkeypatch):
|
||||
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}})
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# resolve_session_timezone — the precedence policy
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.fixture
|
||||
def stub_egress(monkeypatch):
|
||||
"""Make egress resolution deterministic + offline; record if it ran."""
|
||||
state = {"called": False}
|
||||
|
||||
def fake_discover(proxy=None, **kw):
|
||||
state["called"] = True
|
||||
state["proxy_arg"] = proxy
|
||||
return "203.0.113.7"
|
||||
|
||||
monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover)
|
||||
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York")
|
||||
# ensure_geoip_mmdb is imported from .download at call time
|
||||
import invisible_playwright.download as dl
|
||||
|
||||
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
|
||||
return state
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_explicit_iana_wins(stub_egress):
|
||||
# An explicit zone wins and never triggers resolution (proxy or not).
|
||||
assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo"
|
||||
assert resolve_session_timezone("Asia/Tokyo", None) == "Asia/Tokyo"
|
||||
assert stub_egress["called"] is False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_empty_with_proxy_resolves_from_proxy(stub_egress):
|
||||
assert resolve_session_timezone("", SOCKS) == "America/New_York"
|
||||
assert stub_egress["called"] is True
|
||||
assert stub_egress["proxy_arg"] == SOCKS # routed through the proxy
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_auto_with_proxy_resolves_from_proxy(stub_egress):
|
||||
assert resolve_session_timezone("auto", HTTP) == "America/New_York"
|
||||
assert stub_egress["proxy_arg"] == HTTP
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_empty_no_proxy_resolves_from_host(stub_egress):
|
||||
# auto ALWAYS resolves — without a proxy, from the host's own public IP.
|
||||
assert resolve_session_timezone("", None) == "America/New_York"
|
||||
assert stub_egress["called"] is True
|
||||
assert stub_egress["proxy_arg"] is None # direct request, no proxy
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_auto_no_proxy_resolves_from_host(stub_egress):
|
||||
assert resolve_session_timezone("auto", None) == "America/New_York"
|
||||
assert stub_egress["proxy_arg"] is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_direct_proxy_resolves_via_host(stub_egress):
|
||||
# direct:// counts as "no proxy" → resolve from the host IP, don't skip.
|
||||
assert resolve_session_timezone("auto", {"server": "direct://"}) == "America/New_York"
|
||||
assert stub_egress["proxy_arg"] is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_no_proxy_failure_falls_back_to_host(monkeypatch):
|
||||
# Without a proxy, a lookup failure must NOT break the launch → host TZ ("").
|
||||
def boom(proxy=None, **kw):
|
||||
raise GeoTimezoneError("offline")
|
||||
|
||||
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||
assert resolve_session_timezone("auto", None) == ""
|
||||
assert resolve_session_timezone("", None) == ""
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_proxy_failure_raises(monkeypatch):
|
||||
# With a proxy set, a failure must raise — never a silent host-TZ fallback.
|
||||
def boom(proxy=None, **kw):
|
||||
raise GeoTimezoneError("no egress")
|
||||
|
||||
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
resolve_session_timezone("auto", SOCKS)
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
resolve_session_timezone("", SOCKS)
|
||||
131
tests/test_geoip_update.py
Normal file
131
tests/test_geoip_update.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""Unit tests for the intelligent geoip mmdb auto-update in `download.py`.
|
||||
|
||||
daijro/geoip-all-in-one rebuilds weekly; `ensure_geoip_mmdb` keeps the cache
|
||||
fresh without a download (or API call) on every launch. These tests mock the
|
||||
cache root, the latest-tag API, and the per-tag download so nothing touches the
|
||||
network.
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
import invisible_playwright.download as dl
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cache(tmp_path, monkeypatch):
|
||||
"""Point the cache at tmp_path and clear the env override."""
|
||||
monkeypatch.setattr(dl, "cache_root", lambda: tmp_path)
|
||||
monkeypatch.delenv("STEALTHFOX_GEOIP_MMDB", raising=False)
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _make_cached(root, tag, name=dl.GEOIP_MMDB_NAME):
|
||||
d = root / "geoip" / tag
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
f = d / name
|
||||
f.write_bytes(b"FAKE-MMDB")
|
||||
return f
|
||||
|
||||
|
||||
def _set_marker_age(root, days):
|
||||
m = root / "geoip" / ".last_check"
|
||||
m.parent.mkdir(parents=True, exist_ok=True)
|
||||
m.touch()
|
||||
old = time.time() - days * 86400
|
||||
os.utime(m, (old, old))
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# env override
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
def test_env_override_returns_file(tmp_path, monkeypatch):
|
||||
f = tmp_path / "mine.mmdb"
|
||||
f.write_bytes(b"X")
|
||||
monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(f))
|
||||
assert dl.ensure_geoip_mmdb() == f
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_env_override_missing_raises(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(tmp_path / "nope.mmdb"))
|
||||
with pytest.raises(RuntimeError):
|
||||
dl.ensure_geoip_mmdb()
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# freshness window
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
def test_fresh_cache_no_network(cache, monkeypatch):
|
||||
f = _make_cached(cache, "2026.06.03")
|
||||
_set_marker_age(cache, 0) # just checked
|
||||
|
||||
def boom():
|
||||
raise AssertionError("latest-tag API must NOT be called within the window")
|
||||
|
||||
monkeypatch.setattr(dl, "_latest_geoip_tag", boom)
|
||||
assert dl.ensure_geoip_mmdb(max_age_days=7) == f
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_stale_same_tag_no_download(cache, monkeypatch):
|
||||
f = _make_cached(cache, "2026.06.03")
|
||||
_set_marker_age(cache, 30) # stale → will re-check
|
||||
monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.03")
|
||||
# real _download_geoip_tag runs but target exists, so no actual download:
|
||||
monkeypatch.setattr(dl, "_download_file", lambda *a, **k: (_ for _ in ()).throw(
|
||||
AssertionError("must not download when tag already cached")))
|
||||
assert dl.ensure_geoip_mmdb(max_age_days=7) == f
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_stale_new_tag_downloads_and_prunes(cache, monkeypatch):
|
||||
old = _make_cached(cache, "2026.06.03")
|
||||
_set_marker_age(cache, 30)
|
||||
monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.10")
|
||||
|
||||
def fake_download(tag):
|
||||
return _make_cached(cache, tag) # simulate fetch+extract of the new tag
|
||||
|
||||
monkeypatch.setattr(dl, "_download_geoip_tag", fake_download)
|
||||
got = dl.ensure_geoip_mmdb(max_age_days=7)
|
||||
assert got.parent.name == "2026.06.10"
|
||||
assert not old.parent.exists() # old tag pruned
|
||||
assert got.exists()
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# offline resilience
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
def test_api_down_with_cache_uses_cache(cache, monkeypatch):
|
||||
f = _make_cached(cache, "2026.06.03")
|
||||
_set_marker_age(cache, 30)
|
||||
|
||||
def boom():
|
||||
raise OSError("offline")
|
||||
|
||||
monkeypatch.setattr(dl, "_latest_geoip_tag", boom)
|
||||
assert dl.ensure_geoip_mmdb(max_age_days=7) == f # stale cache reused, no raise
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_cold_cache_api_down_falls_back_to_pinned(cache, monkeypatch):
|
||||
# no cache at all + API unreachable → pinned GEOIP_MMDB_VERSION fallback.
|
||||
def boom():
|
||||
raise OSError("offline")
|
||||
|
||||
monkeypatch.setattr(dl, "_latest_geoip_tag", boom)
|
||||
captured = {}
|
||||
|
||||
def fake_download(tag):
|
||||
captured["tag"] = tag
|
||||
return _make_cached(cache, tag)
|
||||
|
||||
monkeypatch.setattr(dl, "_download_geoip_tag", fake_download)
|
||||
got = dl.ensure_geoip_mmdb(max_age_days=7)
|
||||
assert captured["tag"] == dl.GEOIP_MMDB_VERSION
|
||||
assert got.exists()
|
||||
|
|
@ -55,3 +55,217 @@ def test_invisible_playwright_constructs_without_launching():
|
|||
assert obj is not None
|
||||
obj2 = InvisiblePlaywright(seed=42, headless=True)
|
||||
assert obj2 is not None
|
||||
|
||||
|
||||
# ─── profile_dir kwarg — persistent context support ─────────────────────── #
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_profile_dir_none_by_default():
|
||||
"""No persistent profile unless explicitly opted in. Prevents accidental
|
||||
state-leak between scripts that share the same seed."""
|
||||
obj = InvisiblePlaywright(seed=42)
|
||||
assert obj._profile_dir is None
|
||||
assert obj._persistent_context is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_profile_dir_string_is_coerced_to_path(tmp_path):
|
||||
"""Accept str or Path. Always store as Path internally."""
|
||||
obj = InvisiblePlaywright(seed=42, profile_dir=str(tmp_path))
|
||||
assert isinstance(obj._profile_dir, Path)
|
||||
assert obj._profile_dir == tmp_path
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_profile_dir_path_is_stored_as_is(tmp_path):
|
||||
obj = InvisiblePlaywright(seed=42, profile_dir=tmp_path)
|
||||
assert obj._profile_dir == tmp_path
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_profile_dir_does_not_create_dir_until_enter(tmp_path):
|
||||
"""Construction must not touch the filesystem. Directory creation only
|
||||
happens when the user actually enters the context manager — otherwise
|
||||
a typo at instantiation would silently spawn dirs."""
|
||||
target = tmp_path / "nonexistent"
|
||||
assert not target.exists()
|
||||
InvisiblePlaywright(seed=42, profile_dir=target)
|
||||
assert not target.exists()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_persistent_context_kwargs_match_default_exactly():
|
||||
"""Persistent kwargs must be IDENTICAL to non-persistent default
|
||||
kwargs. From firefox-5 (C7 closure) the docShell.overrideTimezone
|
||||
method is present in the patched binary, so the per-realm overrides
|
||||
Playwright applies for `locale=`/`timezone_id=` land successfully and
|
||||
no longer hang the persistent context launch handshake.
|
||||
|
||||
Before firefox-5 we had to filter these out (180s timeout otherwise).
|
||||
A future refactor that re-introduces that filter would silently lose
|
||||
timezone/locale isolation in persistent sessions — this test is the
|
||||
sentinel that catches the regression at the unit level."""
|
||||
obj = InvisiblePlaywright(seed=42, locale="en-GB", timezone="Europe/London",
|
||||
profile_dir="/tmp/x")
|
||||
persistent = obj._persistent_context_kwargs()
|
||||
default = obj._default_context_kwargs()
|
||||
assert persistent == default, (
|
||||
"persistent_context kwargs must match default_context kwargs since "
|
||||
f"firefox-5.\n persistent: {persistent!r}\n default: {default!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_persistent_context_kwargs_INCLUDES_locale_and_timezone():
|
||||
"""Sentinel for the C7 closure: firefox-5 ships the C++ overrideTimezone
|
||||
IDL method, so locale + timezone_id MUST be passed through to
|
||||
launch_persistent_context. If they're not, the wrapper is silently
|
||||
dropping per-context isolation — two sessions with different
|
||||
`timezone=` would end up sharing whatever TZ the env var set.
|
||||
|
||||
Regression-defense: do NOT re-add the firefox-4-era filter."""
|
||||
obj = InvisiblePlaywright(seed=42, locale="en-GB", timezone="Europe/London",
|
||||
profile_dir="/tmp/x")
|
||||
kw = obj._persistent_context_kwargs()
|
||||
assert kw.get("locale") == "en-GB", (
|
||||
f"locale must be in persistent kwargs (firefox-5+ supports it via "
|
||||
f"docShell.languageOverride). Got: {kw.get('locale')!r}"
|
||||
)
|
||||
assert kw.get("timezone_id") == "Europe/London", (
|
||||
f"timezone_id must be in persistent kwargs (firefox-5+ supports it "
|
||||
f"via docShell.overrideTimezone IDL method, patch.md section 19). "
|
||||
f"Got: {kw.get('timezone_id')!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_persistent_context_kwargs_omits_timezone_when_empty_string():
|
||||
"""Empty timezone='' is the 'use host TZ' sentinel — must NOT pass
|
||||
timezone_id to Playwright in that case (would pin to literal '' and
|
||||
break Intl)."""
|
||||
obj = InvisiblePlaywright(seed=42, timezone="", profile_dir="/tmp/x")
|
||||
kw = obj._persistent_context_kwargs()
|
||||
assert "timezone_id" not in kw
|
||||
|
||||
|
||||
# ─── Mocked __enter__ flow — confirms the right Playwright call is made ── #
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_enter_with_profile_dir_calls_launch_persistent_context(tmp_path, monkeypatch):
|
||||
"""When profile_dir is set, __enter__ must call
|
||||
`firefox.launch_persistent_context(user_data_dir=...)` and NOT
|
||||
`firefox.launch(...)`. This is the structural test that the persistent
|
||||
branch is wired correctly — without it, profile_dir would be silently
|
||||
accepted but ignored."""
|
||||
from unittest.mock import MagicMock
|
||||
# Mock ensure_binary so we don't hit the network
|
||||
monkeypatch.setattr("invisible_playwright.launcher.ensure_binary",
|
||||
lambda: tmp_path / "firefox")
|
||||
|
||||
# Mock sync_playwright().start() → fake playwright with our recording firefox
|
||||
fake_ctx = MagicMock(name="persistent_context")
|
||||
fake_firefox = MagicMock()
|
||||
fake_firefox.launch_persistent_context.return_value = fake_ctx
|
||||
fake_playwright = MagicMock()
|
||||
fake_playwright.firefox = fake_firefox
|
||||
fake_pw = MagicMock()
|
||||
fake_pw.start.return_value = fake_playwright
|
||||
|
||||
monkeypatch.setattr("invisible_playwright.launcher.sync_playwright",
|
||||
lambda: fake_pw)
|
||||
|
||||
profile = tmp_path / "myprofile"
|
||||
obj = InvisiblePlaywright(seed=42, profile_dir=profile)
|
||||
returned = obj.__enter__()
|
||||
|
||||
# The persistent branch was taken
|
||||
fake_firefox.launch_persistent_context.assert_called_once()
|
||||
fake_firefox.launch.assert_not_called()
|
||||
|
||||
# The user_data_dir was passed verbatim
|
||||
call_kwargs = fake_firefox.launch_persistent_context.call_args.kwargs
|
||||
assert call_kwargs["user_data_dir"] == str(profile)
|
||||
|
||||
# The directory was created on disk (Playwright fails otherwise)
|
||||
assert profile.exists() and profile.is_dir()
|
||||
|
||||
# __enter__ returned the BrowserContext, not a Browser
|
||||
assert returned is fake_ctx
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_enter_without_profile_dir_calls_launch_not_persistent(tmp_path, monkeypatch):
|
||||
"""Default path: profile_dir=None → firefox.launch, not
|
||||
launch_persistent_context. Sentinel that the non-persistent flow
|
||||
isn't accidentally rerouted."""
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr("invisible_playwright.launcher.ensure_binary",
|
||||
lambda: tmp_path / "firefox")
|
||||
|
||||
fake_browser = MagicMock(name="browser")
|
||||
fake_browser.new_context = MagicMock()
|
||||
fake_firefox = MagicMock()
|
||||
fake_firefox.launch.return_value = fake_browser
|
||||
fake_playwright = MagicMock()
|
||||
fake_playwright.firefox = fake_firefox
|
||||
fake_pw = MagicMock()
|
||||
fake_pw.start.return_value = fake_playwright
|
||||
|
||||
monkeypatch.setattr("invisible_playwright.launcher.sync_playwright",
|
||||
lambda: fake_pw)
|
||||
|
||||
obj = InvisiblePlaywright(seed=42)
|
||||
returned = obj.__enter__()
|
||||
|
||||
fake_firefox.launch.assert_called_once()
|
||||
fake_firefox.launch_persistent_context.assert_not_called()
|
||||
assert returned is fake_browser
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_persistent_context_user_data_dir_is_created_if_missing(tmp_path, monkeypatch):
|
||||
"""First-run scenario: the directory the user names doesn't exist yet.
|
||||
__enter__ must mkdir -p it (Playwright won't, and would crash with
|
||||
'user_data_dir does not exist')."""
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr("invisible_playwright.launcher.ensure_binary",
|
||||
lambda: tmp_path / "firefox")
|
||||
fake_pw = MagicMock()
|
||||
fake_pw.start.return_value = MagicMock()
|
||||
fake_pw.start.return_value.firefox.launch_persistent_context = MagicMock(
|
||||
return_value=MagicMock()
|
||||
)
|
||||
monkeypatch.setattr("invisible_playwright.launcher.sync_playwright",
|
||||
lambda: fake_pw)
|
||||
|
||||
nested = tmp_path / "a" / "b" / "c" / "profile"
|
||||
assert not nested.parent.exists() # parent doesn't exist either
|
||||
obj = InvisiblePlaywright(seed=42, profile_dir=nested)
|
||||
obj.__enter__()
|
||||
assert nested.is_dir()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_teardown_closes_persistent_context(tmp_path, monkeypatch):
|
||||
"""The teardown must close the persistent context. Forgetting this
|
||||
leaves Firefox + Playwright running until the parent process exits,
|
||||
which on long-running tools (job orchestrators, MCP servers) leaks
|
||||
handles indefinitely."""
|
||||
from unittest.mock import MagicMock
|
||||
monkeypatch.setattr("invisible_playwright.launcher.ensure_binary",
|
||||
lambda: tmp_path / "firefox")
|
||||
fake_ctx = MagicMock(name="persistent_context")
|
||||
fake_pw = MagicMock()
|
||||
fake_pw.start.return_value.firefox.launch_persistent_context.return_value = fake_ctx
|
||||
monkeypatch.setattr("invisible_playwright.launcher.sync_playwright",
|
||||
lambda: fake_pw)
|
||||
|
||||
obj = InvisiblePlaywright(seed=42, profile_dir=tmp_path / "p")
|
||||
obj.__enter__()
|
||||
obj.__exit__(None, None, None)
|
||||
fake_ctx.close.assert_called_once()
|
||||
|
|
|
|||
|
|
@ -16,24 +16,11 @@ and covers each patched call site:
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import urllib.parse
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def firefox_binary():
|
||||
if sys.platform not in BINARY_ENTRY_REL:
|
||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
||||
from invisible_playwright.download import cache_dir_for_version
|
||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
||||
if not entry.exists():
|
||||
pytest.skip("patched Firefox binary not cached; run `invisible-playwright fetch`")
|
||||
return str(entry)
|
||||
|
||||
|
||||
def _data_url(html: str) -> str:
|
||||
|
|
@ -195,7 +182,11 @@ def test_hover_triggers_mouseenter(firefox_binary):
|
|||
"onmouseenter=\"window.__h=true\">x</div>"
|
||||
))
|
||||
page.locator("#h").hover()
|
||||
assert page.evaluate("window.__h") is True
|
||||
# Wait for the event rather than reading immediately: under load / on a
|
||||
# virtual display the mouseenter can land a beat after hover() returns,
|
||||
# which made an instant read flaky. wait_for_function still fails (times
|
||||
# out) if mouseenter genuinely never fires.
|
||||
page.wait_for_function("() => window.__h === true", timeout=5000)
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
197
tests/test_proxy_socks_auth_e2e.py
Normal file
197
tests/test_proxy_socks_auth_e2e.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
"""E2E: the patched Firefox SENDS SOCKS5 username/password and routes through it.
|
||||
|
||||
Playwright's own ``proxy=`` ignores SOCKS auth; this is the patched
|
||||
``nsProtocolProxyService`` feature (reads ``network.proxy.socks_username`` /
|
||||
``socks_password``). ``test_proxy.py`` already unit-tests on CI that the wrapper
|
||||
sets those prefs; this proves the binary actually performs the RFC1929 auth
|
||||
handshake and relays traffic.
|
||||
|
||||
Fully hermetic — a local SOCKS5 server + a local HTTP target, with the localhost
|
||||
target forced through the proxy via ``allow_hijacking_localhost`` — so it runs
|
||||
identically on a dev box and on a GitHub runner (no external site, no secrets).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import http.server
|
||||
import socket
|
||||
import socketserver
|
||||
import struct
|
||||
import threading
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
_USER = "ferd_socks_user"
|
||||
_PASS = "ferd_socks_pw_42"
|
||||
|
||||
|
||||
class _Socks5AuthRecorder:
|
||||
"""SOCKS5 that REQUIRES RFC1929 user/pass auth, records the creds it saw,
|
||||
then relays CONNECT to the requested target."""
|
||||
|
||||
def __init__(self):
|
||||
self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
self._srv.bind(("127.0.0.1", 0))
|
||||
self._srv.listen(16)
|
||||
self.port = self._srv.getsockname()[1]
|
||||
self.seen_creds: list[tuple[str, str]] = []
|
||||
self._stop = False
|
||||
threading.Thread(target=self._serve, daemon=True).start()
|
||||
|
||||
def _serve(self):
|
||||
while not self._stop:
|
||||
try:
|
||||
conn, _ = self._srv.accept()
|
||||
except OSError:
|
||||
break
|
||||
threading.Thread(target=self._handle, args=(conn,), daemon=True).start()
|
||||
|
||||
def _recv(self, s, n):
|
||||
buf = b""
|
||||
while len(buf) < n:
|
||||
chunk = s.recv(n - len(buf))
|
||||
if not chunk:
|
||||
return None
|
||||
buf += chunk
|
||||
return buf
|
||||
|
||||
def _handle(self, conn):
|
||||
try:
|
||||
head = self._recv(conn, 2)
|
||||
if not head or head[0] != 0x05:
|
||||
conn.close(); return
|
||||
methods = self._recv(conn, head[1]) or b""
|
||||
if 0x02 not in methods: # we REQUIRE user/pass
|
||||
conn.sendall(b"\x05\xff"); conn.close(); return
|
||||
conn.sendall(b"\x05\x02") # select user/pass auth
|
||||
if not self._recv(conn, 1): # RFC1929 version byte
|
||||
conn.close(); return
|
||||
ulen = self._recv(conn, 1)[0]
|
||||
uname = (self._recv(conn, ulen) or b"").decode("utf-8", "ignore")
|
||||
plen = self._recv(conn, 1)[0]
|
||||
passwd = (self._recv(conn, plen) or b"").decode("utf-8", "ignore")
|
||||
self.seen_creds.append((uname, passwd))
|
||||
conn.sendall(b"\x01\x00") # auth success
|
||||
req = self._recv(conn, 4)
|
||||
if not req:
|
||||
conn.close(); return
|
||||
_, cmd, _, atyp = req
|
||||
if atyp == 0x01:
|
||||
addr = socket.inet_ntoa(self._recv(conn, 4))
|
||||
elif atyp == 0x03:
|
||||
addr = (self._recv(conn, self._recv(conn, 1)[0]) or b"").decode()
|
||||
elif atyp == 0x04:
|
||||
addr = socket.inet_ntop(socket.AF_INET6, self._recv(conn, 16))
|
||||
else:
|
||||
conn.close(); return
|
||||
port = struct.unpack("!H", self._recv(conn, 2))[0]
|
||||
if cmd != 0x01: # only CONNECT
|
||||
conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00"); conn.close(); return
|
||||
try:
|
||||
up = socket.create_connection((addr, port), timeout=15)
|
||||
except OSError:
|
||||
conn.sendall(b"\x05\x05\x00\x01\x00\x00\x00\x00\x00\x00"); conn.close(); return
|
||||
conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00")
|
||||
self._pipe(conn, up)
|
||||
except Exception:
|
||||
try:
|
||||
conn.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _pipe(a, b):
|
||||
def fwd(src, dst):
|
||||
try:
|
||||
while True:
|
||||
data = src.recv(65536)
|
||||
if not data:
|
||||
break
|
||||
dst.sendall(data)
|
||||
except OSError:
|
||||
pass
|
||||
finally:
|
||||
try:
|
||||
dst.shutdown(socket.SHUT_WR)
|
||||
except OSError:
|
||||
pass
|
||||
threading.Thread(target=fwd, args=(a, b), daemon=True).start()
|
||||
fwd(b, a)
|
||||
|
||||
def close(self):
|
||||
self._stop = True
|
||||
try:
|
||||
self._srv.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
class _LocalHTTP:
|
||||
"""A tiny localhost HTTP server — the CONNECT target relayed by the proxy."""
|
||||
|
||||
_HTML = b"<!doctype html><title>ok</title><h1 id=ok>socks-routed</h1>"
|
||||
|
||||
def __init__(self):
|
||||
html = self._HTML
|
||||
|
||||
class H(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self): # noqa: N802
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||
self.send_header("Content-Length", str(len(html)))
|
||||
self.end_headers()
|
||||
self.wfile.write(html)
|
||||
|
||||
def log_message(self, *a):
|
||||
pass
|
||||
|
||||
self._srv = socketserver.TCPServer(("127.0.0.1", 0), H)
|
||||
self.port = self._srv.server_address[1]
|
||||
threading.Thread(target=self._srv.serve_forever, daemon=True).start()
|
||||
|
||||
def close(self):
|
||||
self._srv.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def socks_auth():
|
||||
s = _Socks5AuthRecorder()
|
||||
yield s
|
||||
s.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def local_http():
|
||||
h = _LocalHTTP()
|
||||
yield h
|
||||
h.close()
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_socks5_auth_creds_sent_and_routed(firefox_binary, socks_auth, local_http):
|
||||
"""The binary must perform SOCKS5 user/pass auth with the configured creds
|
||||
and relay the page through the proxy."""
|
||||
proxy = {
|
||||
"server": f"socks5://127.0.0.1:{socks_auth.port}",
|
||||
"username": _USER,
|
||||
"password": _PASS,
|
||||
}
|
||||
# Firefox bypasses the proxy for localhost by default; force it through.
|
||||
prefs = {
|
||||
"network.proxy.allow_hijacking_localhost": True,
|
||||
"network.proxy.no_proxies_on": "",
|
||||
}
|
||||
with InvisiblePlaywright(
|
||||
seed=42, binary_path=firefox_binary, proxy=proxy, extra_prefs=prefs
|
||||
) as browser:
|
||||
page = browser.new_page()
|
||||
page.goto(f"http://127.0.0.1:{local_http.port}/", wait_until="load", timeout=30000)
|
||||
text = page.evaluate("() => document.getElementById('ok').textContent")
|
||||
|
||||
assert text == "socks-routed", "page did not load through the SOCKS proxy"
|
||||
assert (_USER, _PASS) in socks_auth.seen_creds, (
|
||||
f"patched Firefox did not send the SOCKS5 auth creds from prefs; "
|
||||
f"proxy saw: {socks_auth.seen_creds!r}"
|
||||
)
|
||||
349
tests/test_recaptcha_seed.py
Normal file
349
tests/test_recaptcha_seed.py
Normal file
|
|
@ -0,0 +1,349 @@
|
|||
"""Unit tests for the deterministic reCAPTCHA cookie builder.
|
||||
|
||||
Validates the contract:
|
||||
- 6 .google.com cookies always present
|
||||
- Per-site cookies built from a `browsing_history` list (sampled by the
|
||||
Bayesian network in _fpforge)
|
||||
- Determinism: same (seed, history) → identical content
|
||||
- Chrome 400-day cookie cap respected
|
||||
- Playwright add_cookies field requirements satisfied
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from invisible_playwright._recaptcha_seed import (
|
||||
build_cookies,
|
||||
_sub_seed,
|
||||
)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
_FIXED_NOW = 1779600000 # 2026-05-23, frozen for determinism
|
||||
|
||||
|
||||
# Sample browsing history for tests (mimics what _fpforge produces).
|
||||
_SAMPLE_HISTORY = [
|
||||
{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"},
|
||||
{"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||
{"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"},
|
||||
{"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"},
|
||||
]
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 1. Set composition
|
||||
# ===========================================================================
|
||||
|
||||
def test_only_google_cookies_when_no_history():
|
||||
"""Empty/None history → only the 5 .google.com cookies (1P_JAR removed
|
||||
in realism round 2 — deprecated by Google 2022)."""
|
||||
cookies = build_cookies(seed=42, browsing_history=None, now=_FIXED_NOW)
|
||||
names = sorted(c["name"] for c in cookies)
|
||||
assert names == sorted(["NID", "CONSENT", "SOCS",
|
||||
"_GRECAPTCHA", "ENID"])
|
||||
assert all(c["domain"] == ".google.com" for c in cookies)
|
||||
|
||||
|
||||
def test_browsing_history_adds_host_cookies():
|
||||
"""Each history site contributes 1+ cookies on its domain."""
|
||||
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
google = [c for c in cookies if c["domain"] == ".google.com"]
|
||||
assert len(google) == 5 # 1P_JAR removed
|
||||
|
||||
domains = {c["domain"] for c in cookies if c["domain"] != ".google.com"}
|
||||
for site in _SAMPLE_HISTORY:
|
||||
assert f".{site['name']}" in domains
|
||||
|
||||
|
||||
def test_domain_dot_prefix_normalized():
|
||||
"""All host cookie domains have a leading dot for sub-domain coverage."""
|
||||
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
for c in cookies:
|
||||
assert c["domain"].startswith("."), f"missing dot: {c['domain']}"
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 2. Cookie profile recipes (each profile yields the expected cookie set)
|
||||
# ===========================================================================
|
||||
|
||||
def test_profile_minimal_yields_ga_only():
|
||||
history = [{"name": "x.com", "cookie_profile": "minimal"}]
|
||||
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||
names = [c["name"] for c in host]
|
||||
assert names == ["_ga"]
|
||||
|
||||
|
||||
def test_profile_ga_only_yields_ga_and_gid():
|
||||
history = [{"name": "x.com", "cookie_profile": "ga_only"}]
|
||||
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||
names = sorted(c["name"] for c in host)
|
||||
assert names == ["_ga", "_gid"]
|
||||
|
||||
|
||||
def test_profile_ga_cf_yields_ga_and_cf_bm():
|
||||
history = [{"name": "x.com", "cookie_profile": "ga_cf"}]
|
||||
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||
names = sorted(c["name"] for c in host)
|
||||
assert names == ["__cf_bm", "_ga"]
|
||||
|
||||
|
||||
def test_profile_ga_consent_yields_three_cookies():
|
||||
history = [{"name": "x.com", "cookie_profile": "ga_consent"}]
|
||||
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||
names = sorted(c["name"] for c in host)
|
||||
# Always _ga + _gid + one of OneTrust|CookieYes
|
||||
assert "_ga" in names and "_gid" in names
|
||||
assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent"))
|
||||
assert len(host) == 3
|
||||
|
||||
|
||||
def test_profile_ga_consent_clarity_yields_at_least_four_cookies():
|
||||
"""Always _ga + _gid + _clck + consent banner. Optionally _fbp, _dc_gtm_*,
|
||||
__hssrc (probabilistic per rng — see test_new_helper_cookies_*)."""
|
||||
history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}]
|
||||
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||
names = sorted(c["name"] for c in host)
|
||||
assert "_ga" in names and "_gid" in names and "_clck" in names
|
||||
assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent"))
|
||||
assert len(host) >= 4 # 4 baseline + 0-3 helpers
|
||||
|
||||
|
||||
def test_unknown_profile_falls_back_to_ga():
|
||||
history = [{"name": "x.com", "cookie_profile": "nonexistent_profile"}]
|
||||
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||
assert [c["name"] for c in host] == ["_ga"]
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 3. Determinism
|
||||
# ===========================================================================
|
||||
|
||||
def test_same_seed_and_history_same_content():
|
||||
a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
b = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_different_seed_different_content():
|
||||
a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
b = build_cookies(seed=99, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
a_nid = next(c for c in a if c["name"] == "NID")["value"]
|
||||
b_nid = next(c for c in b if c["name"] == "NID")["value"]
|
||||
assert a_nid != b_nid
|
||||
|
||||
|
||||
def test_history_order_does_not_affect_domain_specific_cookies():
|
||||
"""Sub-seed is keyed on domain name, not order in history list."""
|
||||
h1 = [_SAMPLE_HISTORY[0], _SAMPLE_HISTORY[1]]
|
||||
h2 = [_SAMPLE_HISTORY[1], _SAMPLE_HISTORY[0]]
|
||||
a = {(c["domain"], c["name"]): c["value"]
|
||||
for c in build_cookies(seed=42, browsing_history=h1, now=_FIXED_NOW)
|
||||
if c["domain"] != ".google.com"}
|
||||
b = {(c["domain"], c["name"]): c["value"]
|
||||
for c in build_cookies(seed=42, browsing_history=h2, now=_FIXED_NOW)
|
||||
if c["domain"] != ".google.com"}
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_sub_seed_distinct_tags_distinct_streams():
|
||||
assert _sub_seed(42, "google") != _sub_seed(42, "dom:github.com")
|
||||
assert _sub_seed(42, "dom:github.com") != _sub_seed(42, "dom:amazon.com")
|
||||
assert _sub_seed(0, "any") != 0 # seed=0 still produces non-zero sub-seed
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 4. Format / structural correctness for the Google batch
|
||||
# ===========================================================================
|
||||
|
||||
def test_nid_format():
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||
nid = next(c for c in cookies if c["name"] == "NID")
|
||||
prefix, b64 = nid["value"].split("=", 1)
|
||||
assert prefix.isdigit() and len(prefix) == 3
|
||||
# Broadened to 100-540 in realism round 2 to cover historical NID versions
|
||||
assert 100 <= int(prefix) <= 540
|
||||
assert len(b64) == 178
|
||||
|
||||
|
||||
def test_consent_format():
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||
assert consent["value"].startswith("YES+cb.")
|
||||
assert "+FX+" in consent["value"]
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 5. Chrome 400-day cookie cap compliance
|
||||
# ===========================================================================
|
||||
|
||||
def test_all_expiries_within_400_day_cap():
|
||||
"""Chrome 104+ caps cookie expiry to 400 days. Cookies > 400d silently
|
||||
truncated / dropped. We tighten everything to <=395d (except __cf_bm
|
||||
which is short-lived telemetry)."""
|
||||
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
max_allowed = _FIXED_NOW + 400 * 86400
|
||||
for c in cookies:
|
||||
# Short-lived telemetry cookies are fine
|
||||
if c["name"] in ("__cf_bm", "1P_JAR", "_gid"):
|
||||
continue
|
||||
assert c["expires"] <= max_allowed, (
|
||||
f"Cookie {c['name']} expires {c['expires'] - _FIXED_NOW}s "
|
||||
f"(> 400d cap) — would be silently dropped"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 6. Playwright add_cookies field requirements
|
||||
# ===========================================================================
|
||||
|
||||
def test_all_cookies_have_required_playwright_fields():
|
||||
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
for c in cookies:
|
||||
assert c.get("name"), f"missing name: {c}"
|
||||
assert c.get("value") is not None, f"missing value: {c}"
|
||||
assert c.get("domain"), f"missing domain: {c}"
|
||||
assert c.get("path") == "/", f"path != / for {c['name']}"
|
||||
|
||||
|
||||
def test_modern_cookies_marked_secure():
|
||||
"""Cookies with sameSite=None require secure=True under Firefox/Chrome.
|
||||
Also generally needed for cookies set via Playwright add_cookies without
|
||||
a navigation context."""
|
||||
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
for c in cookies:
|
||||
if c.get("sameSite") == "None":
|
||||
assert c.get("secure") is True, f"{c['name']} None+!secure invalid"
|
||||
|
||||
|
||||
def test_httponly_on_signed_cookies():
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||
nid = next(c for c in cookies if c["name"] == "NID")
|
||||
enid = next(c for c in cookies if c["name"] == "ENID")
|
||||
assert nid.get("httpOnly") is True
|
||||
assert enid.get("httpOnly") is True
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 7. End-to-end with real fpforge Profile
|
||||
# ===========================================================================
|
||||
|
||||
def test_with_real_fpforge_profile():
|
||||
"""End-to-end: generate a real Profile, ensure browsing_history is populated
|
||||
and build_cookies works against it."""
|
||||
from invisible_playwright._fpforge import generate_profile
|
||||
prof = generate_profile(seed=42)
|
||||
assert isinstance(prof.browsing_history, list)
|
||||
# The Bayesian network samples ~15-30 sites per persona
|
||||
assert 5 <= len(prof.browsing_history) <= 50, \
|
||||
f"unexpected history length: {len(prof.browsing_history)}"
|
||||
# Each entry has the expected fields
|
||||
for site in prof.browsing_history:
|
||||
assert "name" in site and "category" in site and "cookie_profile" in site
|
||||
# build_cookies works against the real profile
|
||||
cookies = build_cookies(seed=prof.seed, browsing_history=prof.browsing_history,
|
||||
now=_FIXED_NOW)
|
||||
# 6 google + at least 1 cookie per visited site
|
||||
assert len(cookies) >= 6 + len(prof.browsing_history)
|
||||
|
||||
|
||||
def test_same_seed_same_browsing_history_via_fpforge():
|
||||
"""Profile.browsing_history is deterministic from seed (Bayesian sampler)."""
|
||||
from invisible_playwright._fpforge import generate_profile
|
||||
a = generate_profile(seed=42).browsing_history
|
||||
b = generate_profile(seed=42).browsing_history
|
||||
assert a == b
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 8. Realism improvements (2026-05-24 round 2)
|
||||
# ===========================================================================
|
||||
|
||||
def test_no_1p_jar_cookie():
|
||||
"""1P_JAR was deprecated by Google in 2022. Including it is an
|
||||
anachronism flag for fingerprinters that look at cookie freshness."""
|
||||
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||
names = {c["name"] for c in cookies}
|
||||
assert "1P_JAR" not in names
|
||||
|
||||
|
||||
def test_nid_prefix_broadened_range():
|
||||
"""NID 3-digit prefix should cover historical versions (137/105/511/525
|
||||
seen in real captures) — range 100-540, not just 500-540."""
|
||||
seen_prefixes = set()
|
||||
for seed in range(200):
|
||||
cookies = build_cookies(seed=seed, now=_FIXED_NOW)
|
||||
nid = next(c for c in cookies if c["name"] == "NID")
|
||||
prefix = int(nid["value"].split("=", 1)[0])
|
||||
seen_prefixes.add(prefix)
|
||||
assert min(seen_prefixes) < 500, f"NID range never goes below 500 ({sorted(seen_prefixes)[:5]})"
|
||||
assert max(seen_prefixes) <= 540
|
||||
|
||||
|
||||
def test_consent_lang_from_timezone_eu():
|
||||
"""CONSENT cookie's `lang+region` token derived from IANA timezone."""
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Rome")
|
||||
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||
assert ".it+IT+" in consent["value"], f"expected it+IT in: {consent['value']}"
|
||||
|
||||
|
||||
def test_consent_lang_default_fx():
|
||||
"""Unknown / US timezone → default `en+FX` (non-EU fallback)."""
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="America/New_York")
|
||||
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||
assert ".en+FX+" in consent["value"]
|
||||
|
||||
|
||||
def test_consent_lang_de_for_berlin():
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Berlin")
|
||||
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||
assert ".de+DE+" in consent["value"]
|
||||
|
||||
|
||||
def test_consent_lang_no_timezone_default():
|
||||
"""timezone=None → default en+FX."""
|
||||
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||
assert ".en+FX+" in consent["value"]
|
||||
|
||||
|
||||
def test_new_helper_cookies_appear_in_ga_consent_clarity():
|
||||
"""ga_consent_clarity recipe should sometimes include _fbp, _dc_gtm_*, __hssrc
|
||||
(probabilistic per rng). Check across many seeds that they appear."""
|
||||
saw_fbp = False
|
||||
saw_gtm = False
|
||||
saw_hssrc = False
|
||||
history = [{"name": "site.com", "cookie_profile": "ga_consent_clarity"}]
|
||||
for seed in range(100):
|
||||
cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW)
|
||||
names = {c["name"] for c in cookies if c["domain"] == ".site.com"}
|
||||
if "_fbp" in names: saw_fbp = True
|
||||
if any(n.startswith("_dc_gtm_") for n in names): saw_gtm = True
|
||||
if "__hssrc" in names: saw_hssrc = True
|
||||
assert saw_fbp, "_fbp never appeared in 100 seeds (rng pick broken)"
|
||||
assert saw_gtm, "_dc_gtm_* never appeared in 100 seeds"
|
||||
assert saw_hssrc, "__hssrc never appeared in 100 seeds"
|
||||
|
||||
|
||||
def test_fbp_format():
|
||||
"""_fbp format: fb.<idx>.<unix_ms>.<random_int>"""
|
||||
history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}]
|
||||
# Try multiple seeds until we hit a seed that includes _fbp (50% chance)
|
||||
for seed in range(20):
|
||||
cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW)
|
||||
fbp = next((c for c in cookies if c["name"] == "_fbp"), None)
|
||||
if fbp:
|
||||
parts = fbp["value"].split(".")
|
||||
assert parts[0] == "fb"
|
||||
assert parts[1].isdigit()
|
||||
assert parts[2].isdigit() and len(parts[2]) >= 13 # unix ms
|
||||
assert parts[3].isdigit()
|
||||
return
|
||||
raise AssertionError("never got _fbp across 20 seeds — distribution broken")
|
||||
253
tests/test_release_e2e.py
Normal file
253
tests/test_release_e2e.py
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
"""End-to-end release tests.
|
||||
|
||||
These exercise the FULL user install path against the LIVE GitHub release.
|
||||
They are slow (download a ~110 MB binary, launch Firefox) and require network
|
||||
access — marked `e2e` so they're excluded from the default suite. Run them
|
||||
BEFORE announcing a release:
|
||||
|
||||
pytest tests/test_release_e2e.py -m e2e -v
|
||||
|
||||
Or to target a specific git revision (default is current HEAD on origin/main):
|
||||
|
||||
INVPW_E2E_REV=v0.1.5 pytest tests/test_release_e2e.py -m e2e -v
|
||||
|
||||
What each test verifies and why it exists:
|
||||
|
||||
test_clean_install_from_git_main:
|
||||
Spawns a fresh venv and pip-installs the wrapper from git HEAD. Confirms
|
||||
the package has no broken metadata, missing deps, or import errors in a
|
||||
pristine environment. Catches the "works on my machine because I already
|
||||
have the dev deps" class of bug.
|
||||
|
||||
test_fetch_against_live_release:
|
||||
After the install, runs `python -m invisible_playwright fetch --force`,
|
||||
which downloads the live tarball + checksums.txt for the pinned
|
||||
BINARY_VERSION from the production GitHub release. This is THE test that
|
||||
would have caught LostBoxArt's #15 — the checksums.txt parser bug only
|
||||
manifested against the real binary-mode format the release ships, not
|
||||
against unit-test mocks.
|
||||
|
||||
test_version_command_after_fetch:
|
||||
Confirms `python -m invisible_playwright --version` resolves the binary
|
||||
and reports the expected `firefox-N` tag. Sanity check that the binary
|
||||
landed in the cache and the wrapper can find it.
|
||||
|
||||
test_playwright_launch_against_real_site (linux-only by default):
|
||||
Launches the patched Firefox under the wrapper, navigates to a stable
|
||||
public URL, and reads a known DOM property. This is the full stack:
|
||||
wrapper init → Firefox launch → Juggler handshake → page.goto →
|
||||
page.evaluate. If anything along the way regresses (Juggler protocol
|
||||
schema drift, prefs typo, sandbox issue, …) this fails loudly.
|
||||
|
||||
The tests use a temp cache dir per run (env var
|
||||
`INVISIBLE_PLAYWRIGHT_CACHE_DIR`) so they never poison the developer's real
|
||||
cache and never get false positives from a previously-cached binary.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
REPO_URL = "https://github.com/feder-cr/invisible_playwright.git"
|
||||
REV = os.environ.get("INVPW_E2E_REV", "main")
|
||||
|
||||
|
||||
# ---------- helpers --------------------------------------------------------- #
|
||||
|
||||
|
||||
def _run(cmd: list[str], *, env: dict | None = None, cwd: Path | None = None,
|
||||
timeout: int = 300, check: bool = True) -> subprocess.CompletedProcess:
|
||||
"""Run a subprocess with full output captured. Fail with both streams shown."""
|
||||
result = subprocess.run(
|
||||
cmd, env=env, cwd=cwd, timeout=timeout,
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
if check and result.returncode != 0:
|
||||
raise AssertionError(
|
||||
f"{' '.join(cmd)} exited {result.returncode}\n"
|
||||
f"--- stdout ---\n{result.stdout[-3000:]}\n"
|
||||
f"--- stderr ---\n{result.stderr[-3000:]}"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _venv_python(venv: Path) -> Path:
|
||||
if os.name == "nt":
|
||||
return venv / "Scripts" / "python.exe"
|
||||
return venv / "bin" / "python"
|
||||
|
||||
|
||||
# ---------- fixtures -------------------------------------------------------- #
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def workspace() -> Path:
|
||||
"""A single temp dir reused across the module so we don't re-create the
|
||||
venv + re-download the 110 MB tarball for every individual test."""
|
||||
root = Path(tempfile.mkdtemp(prefix="invpw-e2e-"))
|
||||
yield root
|
||||
shutil.rmtree(root, ignore_errors=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def clean_venv(workspace: Path) -> Path:
|
||||
"""A fresh venv, pip upgraded. Returns its python executable path."""
|
||||
venv_dir = workspace / "venv"
|
||||
_run([sys.executable, "-m", "venv", str(venv_dir)], timeout=180)
|
||||
py = _venv_python(venv_dir)
|
||||
assert py.exists(), f"venv python not found at {py}"
|
||||
_run([str(py), "-m", "pip", "install", "--upgrade", "pip", "--quiet"], timeout=180)
|
||||
return py
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def isolated_cache_env(workspace: Path) -> dict:
|
||||
"""Environment dict pointing the wrapper at a private cache dir so this
|
||||
test never reads or pollutes the developer's real cache."""
|
||||
cache = workspace / "cache"
|
||||
cache.mkdir(exist_ok=True)
|
||||
env = os.environ.copy()
|
||||
env["INVISIBLE_PLAYWRIGHT_CACHE_DIR"] = str(cache)
|
||||
env["XDG_CACHE_HOME"] = str(cache)
|
||||
return env
|
||||
|
||||
|
||||
# ---------- tests ----------------------------------------------------------- #
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_clean_install_from_git_main(clean_venv: Path):
|
||||
"""The package installs cleanly from git+HTTPS in a pristine venv."""
|
||||
url = f"git+{REPO_URL}@{REV}"
|
||||
_run([str(clean_venv), "-m", "pip", "install", url], timeout=600)
|
||||
|
||||
# Importability check — catches missing __init__ exports, broken syntax,
|
||||
# missing runtime deps.
|
||||
out = _run(
|
||||
[str(clean_venv), "-c",
|
||||
"import invisible_playwright as ip; "
|
||||
"print('OK', ip.__name__)"],
|
||||
timeout=30,
|
||||
)
|
||||
assert "OK invisible_playwright" in out.stdout
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_version_command_reports_wrapper_and_binary(clean_venv: Path):
|
||||
"""`python -m invisible_playwright --version` runs and reports both the
|
||||
wrapper version and the BINARY_VERSION it'll try to fetch."""
|
||||
out = _run(
|
||||
[str(clean_venv), "-m", "invisible_playwright", "--version"],
|
||||
timeout=30,
|
||||
)
|
||||
text = out.stdout + out.stderr
|
||||
assert "firefox-" in text, f"BINARY_VERSION not reported: {text!r}"
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fetch_against_live_release(clean_venv: Path, isolated_cache_env: dict):
|
||||
"""Hit the LIVE GitHub release: download tarball + checksums.txt, parse,
|
||||
SHA256-verify, extract. This is the regression sentinel for #15.
|
||||
|
||||
If checksums.txt is shipped in `*`-prefixed (binary) format and the parser
|
||||
keeps the `*` in the key, this raises
|
||||
RuntimeError: no SHA256 for {asset} in checksums.txt
|
||||
"""
|
||||
out = _run(
|
||||
[str(clean_venv), "-m", "invisible_playwright", "fetch", "--force"],
|
||||
env=isolated_cache_env,
|
||||
timeout=900, # 110 MB download + extract on slow connections
|
||||
)
|
||||
output = out.stdout + out.stderr
|
||||
# Anti-regression for #15: this exact string would surface if the parser
|
||||
# broke again. Spell it out so a future failure is grep-able to the issue.
|
||||
assert "no SHA256 for" not in output, (
|
||||
"Issue #15 regression: parser couldn't find SHA for the asset.\n"
|
||||
f"Output:\n{output[-2000:]}"
|
||||
)
|
||||
assert "SHA256 mismatch" not in output, (
|
||||
"Tarball SHA doesn't match the published checksums.txt — "
|
||||
"either the upload was corrupted or the release was re-packed "
|
||||
"without updating checksums.txt."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_binary_executes_after_fetch(clean_venv: Path, isolated_cache_env: dict):
|
||||
"""After fetch, the binary cache contains a launchable Firefox."""
|
||||
out = _run(
|
||||
[str(clean_venv), "-c",
|
||||
"from invisible_playwright.download import ensure_binary; "
|
||||
"p = ensure_binary(); print('BINARY', p)"],
|
||||
env=isolated_cache_env,
|
||||
timeout=60,
|
||||
)
|
||||
binary_line = [l for l in out.stdout.splitlines() if l.startswith("BINARY ")]
|
||||
assert binary_line, f"ensure_binary() didn't print path: {out.stdout!r}"
|
||||
binary_path = Path(binary_line[0].split(" ", 1)[1])
|
||||
assert binary_path.exists(), f"binary missing: {binary_path}"
|
||||
|
||||
# `firefox --version` exit code is enough; output format differs across
|
||||
# platforms (Win shows nothing on stdout, Linux prints to stdout).
|
||||
# On Linux invoke via WSL when running from Windows.
|
||||
if os.name == "nt" and binary_path.suffix == "":
|
||||
# Linux binary path on Windows host — skip launch, the previous
|
||||
# ensure_binary() already proved cache landed correctly.
|
||||
pytest.skip("Cross-platform binary launch from Windows requires WSL.")
|
||||
r = subprocess.run([str(binary_path), "--version"],
|
||||
capture_output=True, text=True, timeout=30)
|
||||
text = (r.stdout + r.stderr).lower()
|
||||
assert "firefox" in text and "150." in text, (
|
||||
f"binary --version didn't report Firefox 150: rc={r.returncode} "
|
||||
f"out={r.stdout!r} err={r.stderr!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.linux_only
|
||||
def test_playwright_launch_against_real_site(clean_venv: Path,
|
||||
isolated_cache_env: dict):
|
||||
"""Full stack: launch the patched Firefox via the wrapper, navigate to a
|
||||
real URL, evaluate JS. Catches Juggler protocol drift, profile-generation
|
||||
bugs, locale handling regressions, prefs typos."""
|
||||
if sys.platform.startswith("win"):
|
||||
pytest.skip("Headless launch path requires display server (skip on Win).")
|
||||
|
||||
script = (
|
||||
"from invisible_playwright import InvisiblePlaywright\n"
|
||||
"with InvisiblePlaywright(headless=True, seed=42) as browser:\n"
|
||||
" ctx = browser.new_context()\n"
|
||||
" page = ctx.new_page()\n"
|
||||
" page.goto('https://example.com', timeout=30000)\n"
|
||||
" title = page.title()\n"
|
||||
" ua = page.evaluate('navigator.userAgent')\n"
|
||||
" print('TITLE=' + title)\n"
|
||||
" print('UA=' + ua)\n"
|
||||
)
|
||||
out = _run([str(clean_venv), "-c", script],
|
||||
env=isolated_cache_env, timeout=180)
|
||||
assert "TITLE=Example Domain" in out.stdout, (
|
||||
f"page.title() didn't return expected text:\n{out.stdout[-1000:]}"
|
||||
)
|
||||
assert "UA=" in out.stdout and "Firefox/150" in out.stdout, (
|
||||
"navigator.userAgent doesn't report Firefox/150 — UA spoofing "
|
||||
f"regression?\n{out.stdout[-1000:]}"
|
||||
)
|
||||
|
||||
|
||||
# ---------- meta: verify the test markers themselves work ------------------- #
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_e2e_marker_is_excluded_by_default():
|
||||
"""Sanity check on pyproject.toml's `addopts = '-m not e2e'` — this test
|
||||
only runs when `-m e2e` is passed explicitly. If you're reading this in
|
||||
a normal pytest run, the addopts filter is broken."""
|
||||
assert True
|
||||
226
tests/test_service_worker.py
Normal file
226
tests/test_service_worker.py
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
"""Service worker interception regression tests — issue #18 root cause.
|
||||
|
||||
The bug: `juggler/content/NetworkObserver.js:channelIntercepted` called
|
||||
`interceptedChannel.interceptAfterServiceWorkerResets()` — an IDL method
|
||||
that upstream Playwright adds via a C++ patch (InterceptedHttpChannel.cpp
|
||||
+ nsINetworkInterceptController.idl). Our fork was missing those patches
|
||||
until firefox-6, so the call threw TypeError → C++ NetworkObserver was
|
||||
left in an inconsistent state → content process disposal manifested as
|
||||
"page crash" on sites whose service workers fall through to the network
|
||||
(e.g., id.sky.com).
|
||||
|
||||
These tests inline-serve a service worker via data: URLs / blob URLs
|
||||
where possible — no external network required. They assert the page
|
||||
stays alive across SW registration + fetch lifecycle.
|
||||
|
||||
Run:
|
||||
pytest tests/test_service_worker.py -m e2e -v
|
||||
|
||||
For dev iteration:
|
||||
INVPW_BINARY_PATH=/path/to/firefox.exe pytest tests/test_service_worker.py -m e2e -v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import http.server
|
||||
import socketserver
|
||||
import threading
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Local HTTP fixture server — service workers need a real http(s) origin
|
||||
# (data: and about:blank are opaque-origin, no SW registration possible).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _SWFixtureHandler(http.server.BaseHTTPRequestHandler):
|
||||
"""Serves a tiny set of routes for SW lifecycle testing."""
|
||||
|
||||
PAGES = {
|
||||
"/": (200, "text/html", b"""<!doctype html>
|
||||
<html><head><title>sw-host</title></head>
|
||||
<body>
|
||||
<script>
|
||||
window.__swState = 'loading';
|
||||
if ('serviceWorker' in navigator) {
|
||||
navigator.serviceWorker.register('/sw.js')
|
||||
.then(reg => { window.__swState = 'registered'; })
|
||||
.catch(err => { window.__swState = 'failed:' + err.message; });
|
||||
} else {
|
||||
window.__swState = 'unsupported';
|
||||
}
|
||||
</script>
|
||||
</body></html>
|
||||
"""),
|
||||
"/sw.js": (200, "application/javascript", b"""
|
||||
self.addEventListener('install', e => self.skipWaiting());
|
||||
self.addEventListener('activate', e => e.waitUntil(clients.claim()));
|
||||
self.addEventListener('fetch', e => {
|
||||
if (e.request.url.endsWith('/from-sw')) {
|
||||
e.respondWith(new Response('hello from SW', {
|
||||
headers: {'content-type': 'text/plain'},
|
||||
}));
|
||||
}
|
||||
// Fall through for everything else - exercises the
|
||||
// interceptAfterServiceWorkerResets path that was broken pre-firefox-6.
|
||||
});
|
||||
"""),
|
||||
"/from-sw": (200, "text/plain", b"network-fallback"),
|
||||
"/from-network": (200, "text/plain", b"net-only"),
|
||||
}
|
||||
|
||||
def do_GET(self):
|
||||
path = self.path.split("?", 1)[0]
|
||||
if path in self.PAGES:
|
||||
status, ctype, body = self.PAGES[path]
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Type", ctype)
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
# SW requires HTTPS or localhost — we're on localhost so plain http is fine
|
||||
self.send_header("Service-Worker-Allowed", "/")
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def log_message(self, *args, **kwargs):
|
||||
pass # silence stdout
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def fixture_server():
|
||||
"""Spin up a localhost HTTP server with SW-friendly headers. Yields
|
||||
the base URL (e.g., 'http://127.0.0.1:54321')."""
|
||||
httpd = socketserver.TCPServer(("127.0.0.1", 0), _SWFixtureHandler)
|
||||
port = httpd.server_address[1]
|
||||
thread = threading.Thread(target=httpd.serve_forever, daemon=True)
|
||||
thread.start()
|
||||
try:
|
||||
yield f"http://127.0.0.1:{port}"
|
||||
finally:
|
||||
httpd.shutdown()
|
||||
httpd.server_close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def page(firefox_binary):
|
||||
with InvisiblePlaywright(
|
||||
seed=42,
|
||||
binary_path=firefox_binary,
|
||||
headless=True,
|
||||
) as browser:
|
||||
ctx = browser.new_context()
|
||||
p = ctx.new_page()
|
||||
yield p
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regression tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_service_worker_registration_does_not_crash_page(page, fixture_server):
|
||||
"""Navigate to a page that registers a SW. The page must survive the
|
||||
registration. Pre-firefox-6 this crashed if the SW path hit the missing
|
||||
`interceptAfterServiceWorkerResets()` IDL method."""
|
||||
crashed = {"v": False}
|
||||
page.on("crash", lambda p: crashed.__setitem__("v", True))
|
||||
|
||||
page.goto(f"{fixture_server}/", timeout=15_000)
|
||||
# Wait for SW to register (or fail cleanly)
|
||||
page.wait_for_function(
|
||||
"window.__swState !== 'loading'", timeout=10_000
|
||||
)
|
||||
state = page.evaluate("window.__swState")
|
||||
assert not crashed["v"], f"page crashed during SW registration (state={state!r})"
|
||||
# state should be 'registered' or 'failed:...' (Firefox supports SW)
|
||||
assert state in ("registered",) or state.startswith("failed:"), (
|
||||
f"unexpected SW state: {state!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_page_with_sw_can_navigate_repeatedly(page, fixture_server):
|
||||
"""Once a SW is registered, repeated navigations exercise the
|
||||
interception path on every request. Pre-firefox-6, this hit the C++
|
||||
crash after a few cycles."""
|
||||
crashed = {"v": False}
|
||||
page.on("crash", lambda p: crashed.__setitem__("v", True))
|
||||
|
||||
page.goto(f"{fixture_server}/", timeout=15_000)
|
||||
page.wait_for_function("window.__swState !== 'loading'", timeout=10_000)
|
||||
|
||||
# 5 reloads — the SW fetch handler runs each time
|
||||
for _ in range(5):
|
||||
page.reload(timeout=15_000)
|
||||
assert not crashed["v"]
|
||||
assert page.evaluate("document.title") == "sw-host"
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_fetch_through_sw_returns_sw_synthesized_response(page, fixture_server):
|
||||
"""The SW intercepts `/from-sw` and synthesizes a response without
|
||||
hitting the network. Verifies the SW fetch path is functional — this
|
||||
is the exact flow that crashed in id.sky.com."""
|
||||
page.goto(f"{fixture_server}/", timeout=15_000)
|
||||
page.wait_for_function("window.__swState === 'registered'", timeout=10_000)
|
||||
|
||||
# First request to /from-sw routes through the SW
|
||||
body = page.evaluate("""async (base) => {
|
||||
const r = await fetch(base + '/from-sw');
|
||||
return await r.text();
|
||||
}""", fixture_server)
|
||||
# Either the SW served 'hello from SW' (intercepted) or the network
|
||||
# served 'network-fallback' (if SW didn't claim yet). Both are OK —
|
||||
# the regression we test is that it doesn't CRASH.
|
||||
assert body in ("hello from SW", "network-fallback"), (
|
||||
f"unexpected /from-sw response body: {body!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_sw_fall_through_to_network_does_not_crash(page, fixture_server):
|
||||
"""Request a URL the SW doesn't handle → falls through to network.
|
||||
This is the `interceptAfterServiceWorkerResets()` code path: the SW
|
||||
decides not to handle, the channel goes back to network. Without the
|
||||
C++ patch, this is where the C++ side ended up in an inconsistent
|
||||
state."""
|
||||
crashed = {"v": False}
|
||||
page.on("crash", lambda p: crashed.__setitem__("v", True))
|
||||
|
||||
page.goto(f"{fixture_server}/", timeout=15_000)
|
||||
page.wait_for_function("window.__swState === 'registered'", timeout=10_000)
|
||||
|
||||
# /from-network is NOT intercepted by SW — exercises the fall-through
|
||||
body = page.evaluate("""async (base) => {
|
||||
const r = await fetch(base + '/from-network');
|
||||
return await r.text();
|
||||
}""", fixture_server)
|
||||
assert body == "net-only"
|
||||
assert not crashed["v"]
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_sw_unregister_then_register_again(page, fixture_server):
|
||||
"""Unregistering then re-registering exercises lifecycle bookkeeping
|
||||
in the C++ InterceptedHttpChannel state machine."""
|
||||
crashed = {"v": False}
|
||||
page.on("crash", lambda p: crashed.__setitem__("v", True))
|
||||
|
||||
page.goto(f"{fixture_server}/", timeout=15_000)
|
||||
page.wait_for_function("window.__swState === 'registered'", timeout=10_000)
|
||||
|
||||
# Unregister all SWs then register again
|
||||
result = page.evaluate("""async () => {
|
||||
const regs = await navigator.serviceWorker.getRegistrations();
|
||||
for (const r of regs) await r.unregister();
|
||||
const r2 = await navigator.serviceWorker.register('/sw.js');
|
||||
return r2.scope;
|
||||
}""")
|
||||
assert "/" in result
|
||||
assert not crashed["v"]
|
||||
103
tests/test_version.py
Normal file
103
tests/test_version.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
"""Regression tests for issue #24: CLI version reporting.
|
||||
|
||||
Two distinct symptoms reported by `i43-j`:
|
||||
1. `python -m invisible_playwright --version` errored out (only the
|
||||
`version` subcommand worked).
|
||||
2. `python -m invisible_playwright version` printed the literal string
|
||||
"0.1.0" regardless of the installed version (a stale hardcoded
|
||||
`__version__` in __init__.py that nobody had remembered to bump).
|
||||
|
||||
These tests pin down both behaviours so the regressions don't sneak back
|
||||
in via a future copy/paste.
|
||||
"""
|
||||
import io
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from contextlib import redirect_stdout
|
||||
|
||||
import pytest
|
||||
|
||||
import invisible_playwright
|
||||
from invisible_playwright import __version__, cli
|
||||
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_version_matches_installed_package_metadata():
|
||||
"""__version__ must come from importlib.metadata, not a hardcoded literal,
|
||||
so it can never drift from the pyproject.toml `version` field."""
|
||||
from importlib.metadata import version as pkg_version
|
||||
assert __version__ == pkg_version("invisible-playwright")
|
||||
|
||||
|
||||
def test_version_is_not_the_stale_010_string():
|
||||
"""Issue #24 regression: __version__ used to be hardcoded as '0.1.0'
|
||||
and never updated. If this ever returns to a literal '0.1.0' the
|
||||
package has been published or shipped with stale metadata."""
|
||||
assert __version__ != "0.1.0", (
|
||||
"__version__ is the stale hardcoded '0.1.0' string — issue #24 has "
|
||||
"regressed. Use importlib.metadata to derive it from pyproject.toml."
|
||||
)
|
||||
|
||||
|
||||
def test_version_subcommand_prints_real_version():
|
||||
"""`invisible-playwright version` must print the actual installed version,
|
||||
not the old hardcoded '0.1.0'."""
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
rc = cli.main(["version"])
|
||||
assert rc == 0
|
||||
out = buf.getvalue()
|
||||
assert f"invisible_playwright {__version__}" in out
|
||||
assert "0.1.0" not in out or __version__ == "0.1.0" # safety: only allowed if truly 0.1.0
|
||||
assert "BINARY_VERSION=" in out
|
||||
assert "Firefox " in out
|
||||
|
||||
|
||||
def test_dash_dash_version_flag_works():
|
||||
"""Issue #24 reporter: `python -m invisible_playwright --version` used to
|
||||
error with 'the following arguments are required: cmd' because there was
|
||||
no top-level --version flag, only the `version` subcommand. Now the
|
||||
Python convention works too."""
|
||||
# argparse's --version action calls sys.exit(0) directly, so use subprocess.
|
||||
r = subprocess.run(
|
||||
[sys.executable, "-m", "invisible_playwright", "--version"],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
assert r.returncode == 0, f"--version returned {r.returncode}, stderr={r.stderr!r}"
|
||||
# argparse may emit on stdout or stderr depending on version
|
||||
combined = r.stdout + r.stderr
|
||||
assert "invisible_playwright" in combined
|
||||
assert __version__ in combined
|
||||
|
||||
|
||||
def test_no_args_prints_help_not_traceback():
|
||||
"""`python -m invisible_playwright` with no args should be graceful
|
||||
(print help, exit non-zero) rather than crashing with a traceback."""
|
||||
r = subprocess.run(
|
||||
[sys.executable, "-m", "invisible_playwright"],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
# Either prints help (rc=2) or shows usage. Must NOT contain a traceback.
|
||||
assert "Traceback" not in (r.stdout + r.stderr)
|
||||
assert "usage:" in (r.stdout + r.stderr).lower()
|
||||
|
||||
|
||||
def test_dash_V_short_flag_works():
|
||||
"""Alias `-V` for `--version` (Python convention)."""
|
||||
r = subprocess.run(
|
||||
[sys.executable, "-m", "invisible_playwright", "-V"],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
assert r.returncode == 0
|
||||
assert __version__ in (r.stdout + r.stderr)
|
||||
|
||||
|
||||
def test_version_matches_semver_shape():
|
||||
"""Sanity: version should look like a semver (digits.digits.digits)
|
||||
or a PEP-440 dev marker, not a placeholder string."""
|
||||
assert re.match(r"^\d+\.\d+\.\d+", __version__), (
|
||||
f"__version__ {__version__!r} doesn't look like a real version"
|
||||
)
|
||||
453
tests/test_webrtc_realness.py
Normal file
453
tests/test_webrtc_realness.py
Normal file
|
|
@ -0,0 +1,453 @@
|
|||
"""WebRTC realness regression tests.
|
||||
|
||||
Two layers, both runnable on GitHub CI:
|
||||
|
||||
* **unit** (`@pytest.mark.unit`) — pure SDP/candidate assertions against golden
|
||||
samples. No browser, no proxy, no network. These lock in every rule we found
|
||||
on 2026-06-06: host must be mDNS ``.local``; the synthetic srflx must carry the
|
||||
egress IP with a GENUINE nICEr priority (never ``local_pref == 0xFFFF``) and a
|
||||
stable, distinct foundation; CreepJS's resolver must return the egress, and a
|
||||
host-only SDP must read as "blocked". They run in the standard ``tests.yml``.
|
||||
|
||||
* **e2e** (`@pytest.mark.e2e`) — launch the patched binary and verify the live
|
||||
ICE gather. "Being behind a proxy" is faked WITHOUT smartproxy:
|
||||
- the egress IP is injected via ``STEALTHFOX_WEBRTC_PUBLIC_IP`` (RFC 5737
|
||||
TEST-NET, so it never collides with a real IP);
|
||||
- the "behind a TCP-only SOCKS proxy" condition is reproduced by a tiny
|
||||
in-process SOCKS5 server that relays TCP CONNECT but refuses UDP ASSOCIATE
|
||||
(exactly a residential TCP-only proxy → WebRTC's default-route UDP probe
|
||||
fails → exercises the Fix C fallback). No credentials, no external proxy.
|
||||
Excluded from the default run; a binary is located via ``STEALTHFOX_E2E_BINARY``
|
||||
(or the locally-built tree), else the test skips.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import select
|
||||
import socket
|
||||
import struct
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
import pytest
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
# Pure SDP / ICE-candidate helpers (no I/O) — the heart of the sentinels.
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
_CAND = re.compile(
|
||||
r"candidate:(?P<foundation>\S+)\s+(?P<component>\d+)\s+(?P<proto>UDP|TCP|udp|tcp)\s+"
|
||||
r"(?P<priority>\d+)\s+(?P<address>\S+)\s+(?P<port>\d+)\s+typ\s+(?P<typ>\w+)"
|
||||
r"(?:.*?raddr\s+(?P<raddr>\S+)\s+rport\s+(?P<rport>\d+))?"
|
||||
)
|
||||
|
||||
|
||||
def parse_candidate(line):
|
||||
"""Parse one ``a=candidate:`` / ``candidate:`` line into a dict (or None)."""
|
||||
m = _CAND.search(line)
|
||||
if not m:
|
||||
return None
|
||||
d = m.groupdict()
|
||||
d["component"] = int(d["component"])
|
||||
d["priority"] = int(d["priority"])
|
||||
d["port"] = int(d["port"])
|
||||
d["proto"] = d["proto"].upper()
|
||||
if d["rport"] is not None:
|
||||
d["rport"] = int(d["rport"])
|
||||
return d
|
||||
|
||||
|
||||
def decode_priority(prio):
|
||||
"""Split a candidate priority into nICEr's fields (RFC 5245 layout that
|
||||
nICEr emits: type<<24 | iface<<16 | dir<<13 | stun<<8 | (256-component))."""
|
||||
return {
|
||||
"type_pref": (prio >> 24) & 0xFF,
|
||||
"iface_pref": (prio >> 16) & 0xFF,
|
||||
"local_pref": (prio >> 8) & 0xFFFF,
|
||||
"direction": (prio >> 13) & 0x7,
|
||||
"stun_priority": (prio >> 8) & 0x1F,
|
||||
"component": 256 - (prio & 0xFF),
|
||||
}
|
||||
|
||||
|
||||
def is_mdns(addr):
|
||||
return bool(addr) and str(addr).endswith(".local")
|
||||
|
||||
|
||||
def candidates(sdp_or_lines):
|
||||
if isinstance(sdp_or_lines, str):
|
||||
lines = re.findall(r"(?:a=)?candidate:[^\r\n]*", sdp_or_lines)
|
||||
else:
|
||||
lines = list(sdp_or_lines)
|
||||
return [c for c in (parse_candidate(l) for l in lines) if c]
|
||||
|
||||
|
||||
def host_candidates(cands):
|
||||
return [c for c in cands if c["typ"] == "host"]
|
||||
|
||||
|
||||
def srflx_candidates(cands):
|
||||
return [c for c in cands if c["typ"] == "srflx"]
|
||||
|
||||
|
||||
def host_is_mdns(cands):
|
||||
"""Every host candidate must be a ``<uuid>.local`` mDNS name, never a raw
|
||||
LAN IP (the §9.4 leak form that fails BrowserLeaks)."""
|
||||
hosts = host_candidates(cands)
|
||||
return bool(hosts) and all(is_mdns(c["address"]) for c in hosts)
|
||||
|
||||
|
||||
def srflx_realness(cand, expected_ip=None):
|
||||
"""Return (ok, reasons) for whether ``cand`` looks like a GENUINE nICEr UDP
|
||||
server-reflexive candidate. Encodes the 2026-06-06 findings."""
|
||||
reasons = []
|
||||
if cand["typ"] != "srflx":
|
||||
reasons.append("not a srflx candidate")
|
||||
return False, reasons
|
||||
if expected_ip is not None and cand["address"] != expected_ip:
|
||||
reasons.append(f"address {cand['address']} != expected {expected_ip}")
|
||||
p = decode_priority(cand["priority"])
|
||||
if p["type_pref"] != 100:
|
||||
reasons.append(f"type_pref {p['type_pref']} != 100 (SRV_RFLX)")
|
||||
if p["local_pref"] == 0xFFFF:
|
||||
reasons.append("local_pref == 0xFFFF — impossible nICEr value (the old hardcoded tell)")
|
||||
elif not (0x7000 <= p["local_pref"] < 0x8000):
|
||||
reasons.append(f"local_pref {p['local_pref']} outside the genuine ~0x7E00-0x7FFF band")
|
||||
if not (16 <= p["stun_priority"] <= 31):
|
||||
reasons.append(f"stun_priority {p['stun_priority']} implausible (expect 31-server_id)")
|
||||
if cand.get("raddr") not in (None, "0.0.0.0"):
|
||||
reasons.append(f"raddr {cand['raddr']} not redacted to 0.0.0.0")
|
||||
return (not reasons), reasons
|
||||
|
||||
|
||||
def creep_get_ipaddress(sdp):
|
||||
"""Faithful port of CreepJS's getIPAddress(sdp): connection line first, then
|
||||
the first candidate IP; '0.0.0.0' counts as blocked. Returns None if blocked
|
||||
— i.e. exactly what makes CreepJS render 'stun connection: blocked'."""
|
||||
blocked = "0.0.0.0"
|
||||
conn = (re.findall(r"c=IN\s.+\s", sdp) or [""])[0].strip().split(" ")
|
||||
conn_ip = conn[2] if len(conn) > 2 else ""
|
||||
if conn_ip and conn_ip != blocked:
|
||||
return conn_ip
|
||||
m = re.search(r"(udp|tcp)\s(?:\d|\w)+\s((?:\d|\w|\.|:)+)(?=\s)", sdp, re.I)
|
||||
ip = m.group(2) if m else None
|
||||
return ip if (ip and ip != blocked) else None
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
# Golden samples — real priority/foundation values, TEST-NET IPs (RFC 5737)
|
||||
# so no real address is ever committed (feedback_pre_push_privacy_check).
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
HOST_MDNS = "candidate:0 1 UDP 2122252543 1460e928-16b3-4c66-80ad-04abcdef0000.local 54551 typ host"
|
||||
HOST_RAW_IP = "candidate:0 1 UDP 2122252543 192.168.1.20 54551 typ host" # §9.4 leak form
|
||||
VANILLA_SRFLX = "candidate:1 1 UDP 1685987327 203.0.113.50 3755 typ srflx raddr 0.0.0.0 rport 0"
|
||||
OURS_SRFLX = "candidate:1 1 UDP 1686052863 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0"
|
||||
# Pre-fix injection: local_pref hardcoded to 0xFFFF (priority 1694498815). The tell.
|
||||
OLD_BAD_SRFLX = "candidate:2 1 UDP 1694498815 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0"
|
||||
|
||||
SDP_GOOD = (
|
||||
"v=0\r\nc=IN IP4 0.0.0.0\r\n"
|
||||
f"a={HOST_MDNS}\r\na={OURS_SRFLX}\r\n"
|
||||
)
|
||||
SDP_BLOCKED = "v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n" # host-only, no srflx
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
# UNIT sentinels (run on GitHub CI)
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
def test_parse_and_decode_basics():
|
||||
c = parse_candidate(OURS_SRFLX)
|
||||
assert c["typ"] == "srflx" and c["proto"] == "UDP"
|
||||
assert c["address"] == "203.0.113.7" and c["raddr"] == "0.0.0.0" and c["rport"] == 0
|
||||
p = decode_priority(c["priority"])
|
||||
assert p["type_pref"] == 100 and p["stun_priority"] == 31 and p["component"] == 1
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_genuine_srflx_passes():
|
||||
for line in (VANILLA_SRFLX, OURS_SRFLX):
|
||||
ok, reasons = srflx_realness(parse_candidate(line), expected_ip=parse_candidate(line)["address"])
|
||||
assert ok, reasons
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_old_0xffff_srflx_is_rejected():
|
||||
"""Fix A sentinel: local_pref == 0xFFFF must be flagged as fake."""
|
||||
ok, reasons = srflx_realness(parse_candidate(OLD_BAD_SRFLX))
|
||||
assert not ok
|
||||
assert any("0xFFFF" in r for r in reasons), reasons
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_host_must_be_mdns_not_raw_ip():
|
||||
"""§9.4 sentinel: raw-IP host candidate is a leak; .local is required."""
|
||||
assert host_is_mdns(candidates([HOST_MDNS])) is True
|
||||
assert host_is_mdns(candidates([HOST_RAW_IP])) is False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_srflx_foundation_distinct_from_host():
|
||||
"""Fix B sentinel: srflx foundation must differ from the host foundations."""
|
||||
cands = candidates([HOST_MDNS, OURS_SRFLX])
|
||||
host_fnds = {c["foundation"] for c in host_candidates(cands)}
|
||||
srflx_fnds = {c["foundation"] for c in srflx_candidates(cands)}
|
||||
assert srflx_fnds and srflx_fnds.isdisjoint(host_fnds)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_creep_resolver_returns_egress_when_srflx_present():
|
||||
assert creep_get_ipaddress(SDP_GOOD) == "203.0.113.7"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_creep_resolver_reports_blocked_for_host_only():
|
||||
"""The exact false-green we shipped: host-only (.local) SDP → no public IP
|
||||
→ CreepJS shows 'blocked'. The resolver must return None here."""
|
||||
assert creep_get_ipaddress(SDP_BLOCKED) is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_mdns_host_is_invisible_to_creep_resolver():
|
||||
"""A .local host must NOT be mis-read as an IP (the hyphen in the UUID is
|
||||
what makes CreepJS skip it and fall through to the srflx)."""
|
||||
assert creep_get_ipaddress("v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n") is None
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
# Fake-proxy infrastructure for e2e: a tiny TCP-only SOCKS5 server.
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
class _Socks5TcpOnly:
|
||||
"""Minimal SOCKS5: no-auth, CONNECT (TCP) relayed, UDP ASSOCIATE refused.
|
||||
|
||||
Reproduces a residential TCP-only proxy: pages load over TCP, but WebRTC's
|
||||
UDP path is dead — which (for a no-camera page in default_address_only mode)
|
||||
is exactly what made the default-route probe fail and ICE return zero
|
||||
candidates before Fix C.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
self._srv.bind(("127.0.0.1", 0))
|
||||
self._srv.listen(16)
|
||||
self.port = self._srv.getsockname()[1]
|
||||
self.udp_associate_attempts = 0
|
||||
self._stop = False
|
||||
self._t = threading.Thread(target=self._serve, daemon=True)
|
||||
self._t.start()
|
||||
|
||||
def _serve(self):
|
||||
while not self._stop:
|
||||
try:
|
||||
conn, _ = self._srv.accept()
|
||||
except OSError:
|
||||
break
|
||||
threading.Thread(target=self._handle, args=(conn,), daemon=True).start()
|
||||
|
||||
def _recv_exact(self, sock, n):
|
||||
buf = b""
|
||||
while len(buf) < n:
|
||||
chunk = sock.recv(n - len(buf))
|
||||
if not chunk:
|
||||
return None
|
||||
buf += chunk
|
||||
return buf
|
||||
|
||||
def _handle(self, conn):
|
||||
try:
|
||||
head = self._recv_exact(conn, 2)
|
||||
if not head or head[0] != 0x05:
|
||||
conn.close()
|
||||
return
|
||||
nmethods = head[1]
|
||||
self._recv_exact(conn, nmethods)
|
||||
conn.sendall(b"\x05\x00") # no-auth
|
||||
req = self._recv_exact(conn, 4)
|
||||
if not req:
|
||||
conn.close()
|
||||
return
|
||||
ver, cmd, _, atyp = req
|
||||
if atyp == 0x01:
|
||||
addr = socket.inet_ntoa(self._recv_exact(conn, 4))
|
||||
elif atyp == 0x03:
|
||||
ln = self._recv_exact(conn, 1)[0]
|
||||
addr = self._recv_exact(conn, ln).decode("ascii", "ignore")
|
||||
elif atyp == 0x04:
|
||||
addr = socket.inet_ntop(socket.AF_INET6, self._recv_exact(conn, 16))
|
||||
else:
|
||||
conn.close()
|
||||
return
|
||||
port = struct.unpack("!H", self._recv_exact(conn, 2))[0]
|
||||
if cmd != 0x01: # not CONNECT (e.g. UDP ASSOCIATE) → refuse
|
||||
self.udp_associate_attempts += 1
|
||||
conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00") # cmd not supported
|
||||
conn.close()
|
||||
return
|
||||
try:
|
||||
upstream = socket.create_connection((addr, port), timeout=15)
|
||||
except OSError:
|
||||
conn.sendall(b"\x05\x04\x00\x01\x00\x00\x00\x00\x00\x00") # host unreachable
|
||||
conn.close()
|
||||
return
|
||||
conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00") # success
|
||||
self._relay(conn, upstream)
|
||||
except Exception:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _relay(self, a, b):
|
||||
try:
|
||||
while True:
|
||||
r, _, _ = select.select([a, b], [], [], 30)
|
||||
if not r:
|
||||
break
|
||||
for s in r:
|
||||
data = s.recv(65536)
|
||||
if not data:
|
||||
return
|
||||
(b if s is a else a).sendall(data)
|
||||
finally:
|
||||
for s in (a, b):
|
||||
try:
|
||||
s.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
self._stop = True
|
||||
try:
|
||||
self._srv.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Same per-event probe CreepJS runs (kept tiny; raw string = one escape level).
|
||||
_PROBE_JS = r"""async () => {
|
||||
const pc = new RTCPeerConnection({iceCandidatePoolSize:1, iceServers:[{urls:[
|
||||
'stun:stun4.l.google.com:19302','stun:stun3.l.google.com:19302']}]});
|
||||
pc.createDataChannel('');
|
||||
const cands = [];
|
||||
pc.addEventListener('icecandidate', e => { if (e.candidate && e.candidate.candidate) cands.push(e.candidate.candidate); });
|
||||
await pc.setLocalDescription(await pc.createOffer({offerToReceiveAudio:1, offerToReceiveVideo:1}));
|
||||
await new Promise(r => setTimeout(r, 3500));
|
||||
const sdp = (pc.localDescription && pc.localDescription.sdp) || '';
|
||||
try { pc.close(); } catch(e) {}
|
||||
return { candidates: cands, sdp };
|
||||
}"""
|
||||
|
||||
_FAKE_EGRESS = "203.0.113.7" # RFC 5737 TEST-NET-3
|
||||
|
||||
|
||||
def _e2e_binary():
|
||||
# Honor both env vars so the whole e2e suite targets one binary from a single
|
||||
# setting (INVPW_BINARY_PATH is what conftest's firefox_binary uses).
|
||||
cand = os.environ.get("STEALTHFOX_E2E_BINARY") or os.environ.get("INVPW_BINARY_PATH")
|
||||
if cand and os.path.exists(cand):
|
||||
return cand
|
||||
built = r"C:\ff\source\obj-x86_64-pc-windows-msvc\dist\bin\firefox.exe"
|
||||
if os.path.exists(built):
|
||||
return built
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def socks5_tcp_only():
|
||||
srv = _Socks5TcpOnly()
|
||||
yield srv
|
||||
srv.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def local_https_page():
|
||||
"""A trivial localhost page (used by the no-proxy srflx test)."""
|
||||
class H(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/html")
|
||||
self.end_headers()
|
||||
self.wfile.write(b"<html><body>wrtc</body></html>")
|
||||
|
||||
def log_message(self, *a):
|
||||
pass
|
||||
|
||||
httpd = HTTPServer(("127.0.0.1", 0), H)
|
||||
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||
yield f"http://127.0.0.1:{httpd.server_address[1]}/"
|
||||
httpd.shutdown()
|
||||
|
||||
|
||||
def _launch(**extra):
|
||||
from invisible_playwright import InvisiblePlaywright
|
||||
|
||||
kw = {"headless": True,
|
||||
# Fixed zone so the wrapper does NOT run timezone="auto" egress
|
||||
# discovery through the (fake) proxy — irrelevant here, we inject the
|
||||
# egress IP directly and want the launch deterministic/offline.
|
||||
"timezone": "America/New_York",
|
||||
"extra_prefs": {"media.peerconnection.ice.obfuscate_host_addresses": True}}
|
||||
kw.update(extra)
|
||||
return InvisiblePlaywright(**kw)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_srflx_is_real_and_resolvable(local_https_page):
|
||||
"""No proxy needed: the egress is faked via the env. Asserts the live srflx
|
||||
is genuine (Fix A/B) and that CreepJS's resolver returns it (not blocked)."""
|
||||
binary = _e2e_binary()
|
||||
if not binary:
|
||||
pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)")
|
||||
os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS
|
||||
os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1"
|
||||
with _launch(binary_path=binary) as browser:
|
||||
page = browser.new_context().new_page()
|
||||
page.goto(local_https_page, wait_until="domcontentloaded", timeout=60000)
|
||||
res = page.evaluate(_PROBE_JS)
|
||||
cands = candidates(res["candidates"])
|
||||
assert cands, "ICE produced ZERO candidates (blocked)"
|
||||
assert host_is_mdns(cands), [c["address"] for c in host_candidates(cands)]
|
||||
srflx = [c for c in srflx_candidates(cands) if c["address"] == _FAKE_EGRESS]
|
||||
assert srflx, f"no synthetic srflx with {_FAKE_EGRESS}: {res['candidates']}"
|
||||
ok, reasons = srflx_realness(srflx[0], expected_ip=_FAKE_EGRESS)
|
||||
assert ok, reasons
|
||||
# Two srflx for the same base must share ONE stable foundation (Fix B).
|
||||
assert len({c["foundation"] for c in srflx}) == 1
|
||||
assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_not_blocked_behind_tcp_only_socks(socks5_tcp_only):
|
||||
"""Fix C sentinel: behind a TCP-only SOCKS proxy on a remote origin, ICE
|
||||
must still complete (host .local + synthetic srflx), not return zero
|
||||
candidates. Without Fix C this page is fully 'blocked'."""
|
||||
binary = _e2e_binary()
|
||||
if not binary:
|
||||
pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)")
|
||||
os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS
|
||||
os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1"
|
||||
proxy = {"server": f"socks5://127.0.0.1:{socks5_tcp_only.port}"}
|
||||
try:
|
||||
with _launch(binary_path=binary, proxy=proxy) as browser:
|
||||
page = browser.new_context().new_page()
|
||||
# remote origin loaded THROUGH the local SOCKS proxy (not localhost,
|
||||
# so no proxy-bypass) → WebRTC proxy config active → Fix C path.
|
||||
page.goto("https://example.com/", wait_until="domcontentloaded", timeout=70000)
|
||||
res = page.evaluate(_PROBE_JS)
|
||||
except Exception as exc: # network/proxy unavailable in this environment
|
||||
pytest.skip(f"proxy/network path unavailable: {exc!r}")
|
||||
cands = candidates(res["candidates"])
|
||||
# Hard regression check: ZERO candidates means WebRTC is fully blocked behind
|
||||
# the SOCKS proxy — that's the Fix C regression this sentinel exists to catch.
|
||||
assert cands, "behind SOCKS the gather returned ZERO candidates — Fix C regressed (blocked)"
|
||||
assert host_is_mdns(cands)
|
||||
# The synthetic srflx (= fake egress) needs the remote origin to load FULLY
|
||||
# through the proxy so the WebRTC proxy config engages. That path is
|
||||
# environment-sensitive (it doesn't always engage on a datacenter CI box even
|
||||
# though host candidates gather), so treat a missing srflx as a skip, not a
|
||||
# failure — the local run validates it where the path is real.
|
||||
if not any(c["address"] == _FAKE_EGRESS for c in srflx_candidates(cands)):
|
||||
pytest.skip("synthetic srflx not engaged in this environment "
|
||||
"(needs the remote origin fully through the proxy); validated locally")
|
||||
assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS
|
||||
125
tests/unit/test_config_public.py
Normal file
125
tests/unit/test_config_public.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
"""Unit tests for the public ``config`` helpers."""
|
||||
|
||||
import pytest
|
||||
|
||||
from invisible_playwright import (
|
||||
ensure_binary,
|
||||
get_default_args,
|
||||
get_default_stealth_prefs,
|
||||
)
|
||||
from invisible_playwright.config import get_default_stealth_prefs as _direct
|
||||
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_get_default_args_is_empty_list():
|
||||
"""Currently no baseline CLI args, but must return a list (mutable, fresh each call)."""
|
||||
args = get_default_args()
|
||||
assert args == []
|
||||
assert isinstance(args, list)
|
||||
args.append("--foo")
|
||||
# next call must return a fresh empty list, not the mutated one
|
||||
assert get_default_args() == []
|
||||
|
||||
|
||||
def test_get_default_stealth_prefs_random_seed_returns_dict():
|
||||
"""No seed -> fresh random fingerprint, dict has expected stealth keys."""
|
||||
prefs = get_default_stealth_prefs()
|
||||
assert isinstance(prefs, dict)
|
||||
assert len(prefs) > 0
|
||||
# humanize toggle is always set explicitly
|
||||
assert "invisible_playwright.humanize" in prefs
|
||||
assert prefs["invisible_playwright.humanize"] is True
|
||||
|
||||
|
||||
def test_get_default_stealth_prefs_seed_is_deterministic():
|
||||
"""Same seed -> byte-identical prefs across calls."""
|
||||
a = get_default_stealth_prefs(seed=42)
|
||||
b = get_default_stealth_prefs(seed=42)
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_get_default_stealth_prefs_different_seeds_differ():
|
||||
"""Different seeds -> different prefs."""
|
||||
a = get_default_stealth_prefs(seed=1)
|
||||
b = get_default_stealth_prefs(seed=2)
|
||||
assert a != b
|
||||
|
||||
|
||||
def test_humanize_false_disables_prefs():
|
||||
"""humanize=False removes the maxTime knob and flips the toggle to False."""
|
||||
prefs = get_default_stealth_prefs(seed=42, humanize=False)
|
||||
assert prefs["invisible_playwright.humanize"] is False
|
||||
assert "invisible_playwright.humanize.maxTime" not in prefs
|
||||
|
||||
|
||||
def test_humanize_default_sets_max_time_1_5():
|
||||
"""humanize=True -> default maxTime is 1.5s, stored as string."""
|
||||
prefs = get_default_stealth_prefs(seed=42, humanize=True)
|
||||
assert prefs["invisible_playwright.humanize"] is True
|
||||
assert prefs["invisible_playwright.humanize.maxTime"] == "1.5"
|
||||
|
||||
|
||||
def test_humanize_float_overrides_max_time():
|
||||
"""Float for humanize is the explicit cap in seconds."""
|
||||
prefs = get_default_stealth_prefs(seed=42, humanize=3.0)
|
||||
assert prefs["invisible_playwright.humanize"] is True
|
||||
assert prefs["invisible_playwright.humanize.maxTime"] == "3.0"
|
||||
|
||||
|
||||
def test_extra_prefs_overlay_takes_precedence():
|
||||
"""extra_prefs overlay LAST overrides any baseline value."""
|
||||
prefs = get_default_stealth_prefs(
|
||||
seed=42, extra_prefs={"some.custom.pref": 999}
|
||||
)
|
||||
assert prefs["some.custom.pref"] == 999
|
||||
|
||||
|
||||
def test_extra_prefs_can_override_baseline():
|
||||
"""A key in extra_prefs that also exists in baseline gets overridden."""
|
||||
baseline = get_default_stealth_prefs(seed=42)
|
||||
a_baseline_key = next(iter(baseline.keys()))
|
||||
overridden = get_default_stealth_prefs(
|
||||
seed=42, extra_prefs={a_baseline_key: "OVERRIDDEN_SENTINEL"}
|
||||
)
|
||||
assert overridden[a_baseline_key] == "OVERRIDDEN_SENTINEL"
|
||||
|
||||
|
||||
def test_locale_argument_changes_prefs():
|
||||
"""Different locales produce different prefs (Accept-Language affected)."""
|
||||
en = get_default_stealth_prefs(seed=42, locale="en-US")
|
||||
it = get_default_stealth_prefs(seed=42, locale="it-IT")
|
||||
assert en != it
|
||||
|
||||
|
||||
def test_timezone_argument_changes_prefs():
|
||||
"""Different timezones produce different prefs."""
|
||||
ny = get_default_stealth_prefs(seed=42, timezone="America/New_York")
|
||||
rome = get_default_stealth_prefs(seed=42, timezone="Europe/Rome")
|
||||
assert ny != rome
|
||||
|
||||
|
||||
def test_pin_argument_forces_specific_fields():
|
||||
"""Pin forces a specific field while the rest stays seed-derived."""
|
||||
plain = get_default_stealth_prefs(seed=42)
|
||||
pinned = get_default_stealth_prefs(
|
||||
seed=42, pin={"hardware.concurrency": 999}
|
||||
)
|
||||
# something in the dict must differ vs the plain seed=42 build
|
||||
assert plain != pinned
|
||||
|
||||
|
||||
def test_public_import_matches_direct_import():
|
||||
"""Top-level re-export and direct module import return identical output."""
|
||||
a = get_default_stealth_prefs(seed=42)
|
||||
b = _direct(seed=42)
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_ensure_binary_is_callable_via_public_namespace():
|
||||
"""ensure_binary is re-exported and stays callable from the package root."""
|
||||
# We don't invoke it (would trigger a network download in CI) — just
|
||||
# verify the public attribute is the same callable as the underlying.
|
||||
from invisible_playwright.download import ensure_binary as _direct_eb
|
||||
assert ensure_binary is _direct_eb
|
||||
18
tests/vendor/README.md
vendored
Normal file
18
tests/vendor/README.md
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Vendored detection libraries (test-only)
|
||||
|
||||
These are upstream, unmodified, MIT-licensed browser-fingerprinting / bot-detection
|
||||
libraries, vendored so the detector e2e tests run **hermetically and identically**
|
||||
on a dev box and on a GitHub runner (no external CDN at test time — Firefox
|
||||
tracking-protection blocks the openfpcdn.io CDN anyway, and we want CI offline).
|
||||
|
||||
They are served from a localhost HTTP server and loaded into the patched Firefox;
|
||||
the tests assert the REAL detectors don't flag the stealth build (BotD: `bot===false`)
|
||||
and that the fingerprint is stable (FingerprintJS: same `visitorId` across launches).
|
||||
|
||||
| File | Package | Version | Source | License |
|
||||
|---|---|---|---|---|
|
||||
| `botd-2.0.0.esm.js` | `@fingerprintjs/botd` | 2.0.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/botd@2.0.0/dist/botd.esm.js | MIT |
|
||||
| `fingerprintjs-5.2.0.umd.min.js` | `@fingerprintjs/fingerprintjs` | 5.2.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/fingerprintjs@5.2.0/dist/fp.umd.min.js | MIT |
|
||||
|
||||
Both are MIT (Copyright © FingerprintJS, Inc.). To update: download the pinned
|
||||
dist from jsdelivr, drop it here, and bump the version in the filename + this table.
|
||||
811
tests/vendor/botd-2.0.0.esm.js
vendored
Normal file
811
tests/vendor/botd-2.0.0.esm.js
vendored
Normal file
|
|
@ -0,0 +1,811 @@
|
|||
/**
|
||||
* Fingerprint BotD v2.0.0 - Copyright (c) FingerprintJS, Inc, 2025 (https://fingerprint.com)
|
||||
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
||||
*/
|
||||
|
||||
var version = "2.0.0";
|
||||
|
||||
/**
|
||||
* Enum for types of bots.
|
||||
* Specific types of bots come first, followed by automation technologies.
|
||||
*
|
||||
* @readonly
|
||||
* @enum {string}
|
||||
*/
|
||||
const BotKind = {
|
||||
// Object is used instead of Typescript enum to avoid emitting IIFE which might be affected by further tree-shaking.
|
||||
// See example of compiled enums https://stackoverflow.com/q/47363996)
|
||||
Awesomium: 'awesomium',
|
||||
Cef: 'cef',
|
||||
CefSharp: 'cefsharp',
|
||||
CoachJS: 'coachjs',
|
||||
Electron: 'electron',
|
||||
FMiner: 'fminer',
|
||||
Geb: 'geb',
|
||||
NightmareJS: 'nightmarejs',
|
||||
Phantomas: 'phantomas',
|
||||
PhantomJS: 'phantomjs',
|
||||
Rhino: 'rhino',
|
||||
Selenium: 'selenium',
|
||||
Sequentum: 'sequentum',
|
||||
SlimerJS: 'slimerjs',
|
||||
WebDriverIO: 'webdriverio',
|
||||
WebDriver: 'webdriver',
|
||||
HeadlessChrome: 'headless_chrome',
|
||||
Unknown: 'unknown',
|
||||
};
|
||||
/**
|
||||
* Bot detection error.
|
||||
*/
|
||||
class BotdError extends Error {
|
||||
/**
|
||||
* Creates a new BotdError.
|
||||
*
|
||||
* @class
|
||||
*/
|
||||
constructor(state, message) {
|
||||
super(message);
|
||||
this.state = state;
|
||||
this.name = 'BotdError';
|
||||
Object.setPrototypeOf(this, BotdError.prototype);
|
||||
}
|
||||
}
|
||||
|
||||
function detect(components, detectors) {
|
||||
const detections = {};
|
||||
let finalDetection = {
|
||||
bot: false,
|
||||
};
|
||||
for (const detectorName in detectors) {
|
||||
const detector = detectors[detectorName];
|
||||
const detectorRes = detector(components);
|
||||
let detection = { bot: false };
|
||||
if (typeof detectorRes === 'string') {
|
||||
detection = { bot: true, botKind: detectorRes };
|
||||
}
|
||||
else if (detectorRes) {
|
||||
detection = { bot: true, botKind: BotKind.Unknown };
|
||||
}
|
||||
detections[detectorName] = detection;
|
||||
if (detection.bot) {
|
||||
finalDetection = detection;
|
||||
}
|
||||
}
|
||||
return [detections, finalDetection];
|
||||
}
|
||||
async function collect(sources) {
|
||||
const components = {};
|
||||
const sourcesKeys = Object.keys(sources);
|
||||
await Promise.all(sourcesKeys.map(async (sourceKey) => {
|
||||
const res = sources[sourceKey];
|
||||
try {
|
||||
components[sourceKey] = {
|
||||
value: await res(),
|
||||
state: 0 /* State.Success */,
|
||||
};
|
||||
}
|
||||
catch (error) {
|
||||
if (error instanceof BotdError) {
|
||||
components[sourceKey] = {
|
||||
state: error.state,
|
||||
error: `${error.name}: ${error.message}`,
|
||||
};
|
||||
}
|
||||
else {
|
||||
components[sourceKey] = {
|
||||
state: -3 /* State.UnexpectedBehaviour */,
|
||||
error: error instanceof Error ? `${error.name}: ${error.message}` : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
}));
|
||||
return components;
|
||||
}
|
||||
|
||||
function detectAppVersion({ appVersion }) {
|
||||
if (appVersion.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if (/headless/i.test(appVersion.value))
|
||||
return BotKind.HeadlessChrome;
|
||||
if (/electron/i.test(appVersion.value))
|
||||
return BotKind.Electron;
|
||||
if (/slimerjs/i.test(appVersion.value))
|
||||
return BotKind.SlimerJS;
|
||||
}
|
||||
|
||||
function arrayIncludes(arr, value) {
|
||||
return arr.indexOf(value) !== -1;
|
||||
}
|
||||
function strIncludes(str, value) {
|
||||
return str.indexOf(value) !== -1;
|
||||
}
|
||||
function arrayFind(array, callback) {
|
||||
if ('find' in array)
|
||||
return array.find(callback);
|
||||
for (let i = 0; i < array.length; i++) {
|
||||
if (callback(array[i], i, array))
|
||||
return array[i];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function getObjectProps(obj) {
|
||||
return Object.getOwnPropertyNames(obj);
|
||||
}
|
||||
function includes(arr, ...keys) {
|
||||
for (const key of keys) {
|
||||
if (typeof key === 'string') {
|
||||
if (arrayIncludes(arr, key))
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
const match = arrayFind(arr, (value) => key.test(value));
|
||||
if (match != null)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
function countTruthy(values) {
|
||||
return values.reduce((sum, value) => sum + (value ? 1 : 0), 0);
|
||||
}
|
||||
|
||||
function detectDocumentAttributes({ documentElementKeys }) {
|
||||
if (documentElementKeys.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if (includes(documentElementKeys.value, 'selenium', 'webdriver', 'driver')) {
|
||||
return BotKind.Selenium;
|
||||
}
|
||||
}
|
||||
|
||||
function detectErrorTrace({ errorTrace }) {
|
||||
if (errorTrace.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if (/PhantomJS/i.test(errorTrace.value))
|
||||
return BotKind.PhantomJS;
|
||||
}
|
||||
|
||||
function detectEvalLengthInconsistency({ evalLength, browserKind, browserEngineKind, }) {
|
||||
if (evalLength.state !== 0 /* State.Success */ ||
|
||||
browserKind.state !== 0 /* State.Success */ ||
|
||||
browserEngineKind.state !== 0 /* State.Success */)
|
||||
return;
|
||||
const length = evalLength.value;
|
||||
if (browserEngineKind.value === "unknown" /* BrowserEngineKind.Unknown */)
|
||||
return false;
|
||||
return ((length === 37 && !arrayIncludes(["webkit" /* BrowserEngineKind.Webkit */, "gecko" /* BrowserEngineKind.Gecko */], browserEngineKind.value)) ||
|
||||
(length === 39 && !arrayIncludes(["internet_explorer" /* BrowserKind.IE */], browserKind.value)) ||
|
||||
(length === 33 && !arrayIncludes(["chromium" /* BrowserEngineKind.Chromium */], browserEngineKind.value)));
|
||||
}
|
||||
|
||||
function detectFunctionBind({ functionBind }) {
|
||||
if (functionBind.state === -2 /* State.NotFunction */)
|
||||
return BotKind.PhantomJS;
|
||||
}
|
||||
|
||||
function detectLanguagesLengthInconsistency({ languages }) {
|
||||
if (languages.state === 0 /* State.Success */ && languages.value.length === 0) {
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
}
|
||||
|
||||
function detectMimeTypesConsistent({ mimeTypesConsistent }) {
|
||||
if (mimeTypesConsistent.state === 0 /* State.Success */ && !mimeTypesConsistent.value) {
|
||||
return BotKind.Unknown;
|
||||
}
|
||||
}
|
||||
|
||||
function detectNotificationPermissions({ notificationPermissions, browserKind, }) {
|
||||
if (browserKind.state !== 0 /* State.Success */ || browserKind.value !== "chrome" /* BrowserKind.Chrome */)
|
||||
return false;
|
||||
if (notificationPermissions.state === 0 /* State.Success */ && notificationPermissions.value) {
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
}
|
||||
|
||||
function detectPluginsArray({ pluginsArray }) {
|
||||
if (pluginsArray.state === 0 /* State.Success */ && !pluginsArray.value)
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
|
||||
function detectPluginsLengthInconsistency({ pluginsLength, android, browserKind, browserEngineKind, }) {
|
||||
if (pluginsLength.state !== 0 /* State.Success */ ||
|
||||
android.state !== 0 /* State.Success */ ||
|
||||
browserKind.state !== 0 /* State.Success */ ||
|
||||
browserEngineKind.state !== 0 /* State.Success */)
|
||||
return;
|
||||
if (browserKind.value !== "chrome" /* BrowserKind.Chrome */ ||
|
||||
android.value ||
|
||||
browserEngineKind.value !== "chromium" /* BrowserEngineKind.Chromium */)
|
||||
return;
|
||||
if (pluginsLength.value === 0)
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
|
||||
function detectProcess({ process }) {
|
||||
var _a;
|
||||
if (process.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if (process.value.type === 'renderer' || ((_a = process.value.versions) === null || _a === void 0 ? void 0 : _a.electron) != null)
|
||||
return BotKind.Electron;
|
||||
}
|
||||
|
||||
function detectProductSub({ productSub, browserKind }) {
|
||||
if (productSub.state !== 0 /* State.Success */ || browserKind.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if ((browserKind.value === "chrome" /* BrowserKind.Chrome */ ||
|
||||
browserKind.value === "safari" /* BrowserKind.Safari */ ||
|
||||
browserKind.value === "opera" /* BrowserKind.Opera */ ||
|
||||
browserKind.value === "wechat" /* BrowserKind.WeChat */) &&
|
||||
productSub.value !== '20030107')
|
||||
return BotKind.Unknown;
|
||||
}
|
||||
|
||||
function detectUserAgent({ userAgent }) {
|
||||
if (userAgent.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if (/PhantomJS/i.test(userAgent.value))
|
||||
return BotKind.PhantomJS;
|
||||
if (/Headless/i.test(userAgent.value))
|
||||
return BotKind.HeadlessChrome;
|
||||
if (/Electron/i.test(userAgent.value))
|
||||
return BotKind.Electron;
|
||||
if (/slimerjs/i.test(userAgent.value))
|
||||
return BotKind.SlimerJS;
|
||||
}
|
||||
|
||||
function detectWebDriver({ webDriver }) {
|
||||
if (webDriver.state === 0 /* State.Success */ && webDriver.value)
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
|
||||
function detectWebGL({ webGL }) {
|
||||
if (webGL.state === 0 /* State.Success */) {
|
||||
const { vendor, renderer } = webGL.value;
|
||||
if (vendor == 'Brian Paul' && renderer == 'Mesa OffScreen') {
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function detectWindowExternal({ windowExternal }) {
|
||||
if (windowExternal.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
if (/Sequentum/i.test(windowExternal.value))
|
||||
return BotKind.Sequentum;
|
||||
}
|
||||
|
||||
function detectWindowSize({ windowSize, documentFocus }) {
|
||||
if (windowSize.state !== 0 /* State.Success */ || documentFocus.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
const { outerWidth, outerHeight } = windowSize.value;
|
||||
// When a page is opened in a new tab without focusing it right away, the window outer size is 0x0
|
||||
if (!documentFocus.value)
|
||||
return;
|
||||
if (outerWidth === 0 && outerHeight === 0)
|
||||
return BotKind.HeadlessChrome;
|
||||
}
|
||||
|
||||
function detectDistinctiveProperties({ distinctiveProps }) {
|
||||
if (distinctiveProps.state !== 0 /* State.Success */)
|
||||
return false;
|
||||
const value = distinctiveProps.value;
|
||||
let bot;
|
||||
for (bot in value)
|
||||
if (value[bot])
|
||||
return bot;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
|
||||
const detectors = {
|
||||
detectAppVersion,
|
||||
detectDocumentAttributes,
|
||||
detectErrorTrace,
|
||||
detectEvalLengthInconsistency,
|
||||
detectFunctionBind,
|
||||
detectLanguagesLengthInconsistency,
|
||||
detectNotificationPermissions,
|
||||
detectPluginsArray,
|
||||
detectPluginsLengthInconsistency,
|
||||
detectProcess,
|
||||
detectUserAgent,
|
||||
detectWebDriver,
|
||||
detectWebGL,
|
||||
detectWindowExternal,
|
||||
detectWindowSize,
|
||||
detectMimeTypesConsistent,
|
||||
detectProductSub,
|
||||
detectDistinctiveProperties,
|
||||
};
|
||||
|
||||
function getAppVersion() {
|
||||
const appVersion = navigator.appVersion;
|
||||
if (appVersion == undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.appVersion is undefined');
|
||||
}
|
||||
return appVersion;
|
||||
}
|
||||
|
||||
function getDocumentElementKeys() {
|
||||
if (document.documentElement === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'document.documentElement is undefined');
|
||||
}
|
||||
const { documentElement } = document;
|
||||
if (typeof documentElement.getAttributeNames !== 'function') {
|
||||
throw new BotdError(-2 /* State.NotFunction */, 'document.documentElement.getAttributeNames is not a function');
|
||||
}
|
||||
return documentElement.getAttributeNames();
|
||||
}
|
||||
|
||||
function getErrorTrace() {
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-ignore
|
||||
null[0]();
|
||||
}
|
||||
catch (error) {
|
||||
if (error instanceof Error && error['stack'] != null) {
|
||||
return error.stack.toString();
|
||||
}
|
||||
}
|
||||
throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'errorTrace signal unexpected behaviour');
|
||||
}
|
||||
|
||||
function getEvalLength() {
|
||||
return eval.toString().length;
|
||||
}
|
||||
|
||||
function getFunctionBind() {
|
||||
if (Function.prototype.bind === undefined) {
|
||||
throw new BotdError(-2 /* State.NotFunction */, 'Function.prototype.bind is undefined');
|
||||
}
|
||||
return Function.prototype.bind.toString();
|
||||
}
|
||||
|
||||
function getBrowserEngineKind() {
|
||||
var _a, _b;
|
||||
// Based on research in October 2020. Tested to detect Chromium 42-86.
|
||||
const w = window;
|
||||
const n = navigator;
|
||||
if (countTruthy([
|
||||
'webkitPersistentStorage' in n,
|
||||
'webkitTemporaryStorage' in n,
|
||||
n.vendor.indexOf('Google') === 0,
|
||||
'webkitResolveLocalFileSystemURL' in w,
|
||||
'BatteryManager' in w,
|
||||
'webkitMediaStream' in w,
|
||||
'webkitSpeechGrammar' in w,
|
||||
]) >= 5) {
|
||||
return "chromium" /* BrowserEngineKind.Chromium */;
|
||||
}
|
||||
if (countTruthy([
|
||||
'ApplePayError' in w,
|
||||
'CSSPrimitiveValue' in w,
|
||||
'Counter' in w,
|
||||
n.vendor.indexOf('Apple') === 0,
|
||||
'getStorageUpdates' in n,
|
||||
'WebKitMediaKeys' in w,
|
||||
]) >= 4) {
|
||||
return "webkit" /* BrowserEngineKind.Webkit */;
|
||||
}
|
||||
if (countTruthy([
|
||||
'buildID' in navigator,
|
||||
'MozAppearance' in ((_b = (_a = document.documentElement) === null || _a === void 0 ? void 0 : _a.style) !== null && _b !== void 0 ? _b : {}),
|
||||
'onmozfullscreenchange' in w,
|
||||
'mozInnerScreenX' in w,
|
||||
'CSSMozDocumentRule' in w,
|
||||
'CanvasCaptureMediaStream' in w,
|
||||
]) >= 4) {
|
||||
return "gecko" /* BrowserEngineKind.Gecko */;
|
||||
}
|
||||
return "unknown" /* BrowserEngineKind.Unknown */;
|
||||
}
|
||||
function getBrowserKind() {
|
||||
var _a;
|
||||
const userAgent = (_a = navigator.userAgent) === null || _a === void 0 ? void 0 : _a.toLowerCase();
|
||||
if (strIncludes(userAgent, 'edg/')) {
|
||||
return "edge" /* BrowserKind.Edge */;
|
||||
}
|
||||
else if (strIncludes(userAgent, 'trident') || strIncludes(userAgent, 'msie')) {
|
||||
return "internet_explorer" /* BrowserKind.IE */;
|
||||
}
|
||||
else if (strIncludes(userAgent, 'wechat')) {
|
||||
return "wechat" /* BrowserKind.WeChat */;
|
||||
}
|
||||
else if (strIncludes(userAgent, 'firefox')) {
|
||||
return "firefox" /* BrowserKind.Firefox */;
|
||||
}
|
||||
else if (strIncludes(userAgent, 'opera') || strIncludes(userAgent, 'opr')) {
|
||||
return "opera" /* BrowserKind.Opera */;
|
||||
}
|
||||
else if (strIncludes(userAgent, 'chrome')) {
|
||||
return "chrome" /* BrowserKind.Chrome */;
|
||||
}
|
||||
else if (strIncludes(userAgent, 'safari')) {
|
||||
return "safari" /* BrowserKind.Safari */;
|
||||
}
|
||||
else {
|
||||
return "unknown" /* BrowserKind.Unknown */;
|
||||
}
|
||||
}
|
||||
// Source: https://github.com/fingerprintjs/fingerprintjs/blob/master/src/utils/browser.ts#L223
|
||||
function isAndroid() {
|
||||
const browserEngineKind = getBrowserEngineKind();
|
||||
const isItChromium = browserEngineKind === "chromium" /* BrowserEngineKind.Chromium */;
|
||||
const isItGecko = browserEngineKind === "gecko" /* BrowserEngineKind.Gecko */;
|
||||
const w = window;
|
||||
const n = navigator;
|
||||
const c = 'connection';
|
||||
// Chrome removes all words "Android" from `navigator` when desktop version is requested
|
||||
// Firefox keeps "Android" in `navigator.appVersion` when desktop version is requested
|
||||
if (isItChromium) {
|
||||
return (countTruthy([
|
||||
!('SharedWorker' in w),
|
||||
// `typechange` is deprecated, but it's still present on Android (tested on Chrome Mobile 117)
|
||||
// Removal proposal https://bugs.chromium.org/p/chromium/issues/detail?id=699892
|
||||
// Note: this expression returns true on ChromeOS, so additional detectors are required to avoid false-positives
|
||||
n[c] && 'ontypechange' in n[c],
|
||||
!('sinkId' in new Audio()),
|
||||
]) >= 2);
|
||||
}
|
||||
else if (isItGecko) {
|
||||
return countTruthy(['onorientationchange' in w, 'orientation' in w, /android/i.test(n.appVersion)]) >= 2;
|
||||
}
|
||||
else {
|
||||
// Only 2 browser engines are presented on Android.
|
||||
// Actually, there is also Android 4.1 browser, but it's not worth detecting it at the moment.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
function getDocumentFocus() {
|
||||
if (document.hasFocus === undefined) {
|
||||
return false;
|
||||
}
|
||||
return document.hasFocus();
|
||||
}
|
||||
function isChromium86OrNewer() {
|
||||
// Checked in Chrome 85 vs Chrome 86 both on desktop and Android. Checked in macOS Chrome 128, Android Chrome 127.
|
||||
const w = window;
|
||||
return (countTruthy([
|
||||
!('MediaSettingsRange' in w),
|
||||
'RTCEncodedAudioFrame' in w,
|
||||
'' + w.Intl === '[object Intl]',
|
||||
'' + w.Reflect === '[object Reflect]',
|
||||
]) >= 3);
|
||||
}
|
||||
|
||||
function getLanguages() {
|
||||
const n = navigator;
|
||||
const result = [];
|
||||
const language = n.language || n.userLanguage || n.browserLanguage || n.systemLanguage;
|
||||
if (language !== undefined) {
|
||||
result.push([language]);
|
||||
}
|
||||
if (Array.isArray(n.languages)) {
|
||||
const browserEngine = getBrowserEngineKind();
|
||||
// Starting from Chromium 86, there is only a single value in `navigator.language` in Incognito mode:
|
||||
// the value of `navigator.language`. Therefore, the value is ignored in this browser.
|
||||
if (!(browserEngine === "chromium" /* BrowserEngineKind.Chromium */ && isChromium86OrNewer())) {
|
||||
result.push(n.languages);
|
||||
}
|
||||
}
|
||||
else if (typeof n.languages === 'string') {
|
||||
const languages = n.languages;
|
||||
if (languages) {
|
||||
result.push(languages.split(','));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function areMimeTypesConsistent() {
|
||||
if (navigator.mimeTypes === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.mimeTypes is undefined');
|
||||
}
|
||||
const { mimeTypes } = navigator;
|
||||
let isConsistent = Object.getPrototypeOf(mimeTypes) === MimeTypeArray.prototype;
|
||||
for (let i = 0; i < mimeTypes.length; i++) {
|
||||
isConsistent && (isConsistent = Object.getPrototypeOf(mimeTypes[i]) === MimeType.prototype);
|
||||
}
|
||||
return isConsistent;
|
||||
}
|
||||
|
||||
async function getNotificationPermissions() {
|
||||
if (window.Notification === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'window.Notification is undefined');
|
||||
}
|
||||
if (navigator.permissions === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.permissions is undefined');
|
||||
}
|
||||
const { permissions } = navigator;
|
||||
if (typeof permissions.query !== 'function') {
|
||||
throw new BotdError(-2 /* State.NotFunction */, 'navigator.permissions.query is not a function');
|
||||
}
|
||||
try {
|
||||
const permissionStatus = await permissions.query({ name: 'notifications' });
|
||||
return window.Notification.permission === 'denied' && permissionStatus.state === 'prompt';
|
||||
}
|
||||
catch (e) {
|
||||
throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'notificationPermissions signal unexpected behaviour');
|
||||
}
|
||||
}
|
||||
|
||||
function getPluginsArray() {
|
||||
if (navigator.plugins === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.plugins is undefined');
|
||||
}
|
||||
if (window.PluginArray === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'window.PluginArray is undefined');
|
||||
}
|
||||
return navigator.plugins instanceof PluginArray;
|
||||
}
|
||||
|
||||
function getPluginsLength() {
|
||||
if (navigator.plugins === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.plugins is undefined');
|
||||
}
|
||||
if (navigator.plugins.length === undefined) {
|
||||
throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'navigator.plugins.length is undefined');
|
||||
}
|
||||
return navigator.plugins.length;
|
||||
}
|
||||
|
||||
function getProcess() {
|
||||
const { process } = window;
|
||||
const errorPrefix = 'window.process is';
|
||||
if (process === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, `${errorPrefix} undefined`);
|
||||
}
|
||||
if (process && typeof process !== 'object') {
|
||||
throw new BotdError(-3 /* State.UnexpectedBehaviour */, `${errorPrefix} not an object`);
|
||||
}
|
||||
return process;
|
||||
}
|
||||
|
||||
function getProductSub() {
|
||||
const { productSub } = navigator;
|
||||
if (productSub === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.productSub is undefined');
|
||||
}
|
||||
return productSub;
|
||||
}
|
||||
|
||||
function getRTT() {
|
||||
if (navigator.connection === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.connection is undefined');
|
||||
}
|
||||
if (navigator.connection.rtt === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.connection.rtt is undefined');
|
||||
}
|
||||
return navigator.connection.rtt;
|
||||
}
|
||||
|
||||
function getUserAgent() {
|
||||
return navigator.userAgent;
|
||||
}
|
||||
|
||||
function getWebDriver() {
|
||||
if (navigator.webdriver == undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'navigator.webdriver is undefined');
|
||||
}
|
||||
return navigator.webdriver;
|
||||
}
|
||||
|
||||
function getWebGL() {
|
||||
const canvasElement = document.createElement('canvas');
|
||||
if (typeof canvasElement.getContext !== 'function') {
|
||||
throw new BotdError(-2 /* State.NotFunction */, 'HTMLCanvasElement.getContext is not a function');
|
||||
}
|
||||
const webGLContext = canvasElement.getContext('webgl');
|
||||
if (webGLContext === null) {
|
||||
throw new BotdError(-4 /* State.Null */, 'WebGLRenderingContext is null');
|
||||
}
|
||||
if (typeof webGLContext.getParameter !== 'function') {
|
||||
throw new BotdError(-2 /* State.NotFunction */, 'WebGLRenderingContext.getParameter is not a function');
|
||||
}
|
||||
const vendor = webGLContext.getParameter(webGLContext.VENDOR);
|
||||
const renderer = webGLContext.getParameter(webGLContext.RENDERER);
|
||||
return { vendor: vendor, renderer: renderer };
|
||||
}
|
||||
|
||||
function getWindowExternal() {
|
||||
if (window.external === undefined) {
|
||||
throw new BotdError(-1 /* State.Undefined */, 'window.external is undefined');
|
||||
}
|
||||
const { external } = window;
|
||||
if (typeof external.toString !== 'function') {
|
||||
throw new BotdError(-2 /* State.NotFunction */, 'window.external.toString is not a function');
|
||||
}
|
||||
return external.toString();
|
||||
}
|
||||
|
||||
function getWindowSize() {
|
||||
return {
|
||||
outerWidth: window.outerWidth,
|
||||
outerHeight: window.outerHeight,
|
||||
innerWidth: window.innerWidth,
|
||||
innerHeight: window.innerHeight,
|
||||
};
|
||||
}
|
||||
|
||||
function checkDistinctiveProperties() {
|
||||
// The order in the following list matters, because specific types of bots come first, followed by automation technologies.
|
||||
const distinctivePropsList = {
|
||||
[BotKind.Awesomium]: {
|
||||
window: ['awesomium'],
|
||||
},
|
||||
[BotKind.Cef]: {
|
||||
window: ['RunPerfTest'],
|
||||
},
|
||||
[BotKind.CefSharp]: {
|
||||
window: ['CefSharp'],
|
||||
},
|
||||
[BotKind.CoachJS]: {
|
||||
window: ['emit'],
|
||||
},
|
||||
[BotKind.FMiner]: {
|
||||
window: ['fmget_targets'],
|
||||
},
|
||||
[BotKind.Geb]: {
|
||||
window: ['geb'],
|
||||
},
|
||||
[BotKind.NightmareJS]: {
|
||||
window: ['__nightmare', 'nightmare'],
|
||||
},
|
||||
[BotKind.Phantomas]: {
|
||||
window: ['__phantomas'],
|
||||
},
|
||||
[BotKind.PhantomJS]: {
|
||||
window: ['callPhantom', '_phantom'],
|
||||
},
|
||||
[BotKind.Rhino]: {
|
||||
window: ['spawn'],
|
||||
},
|
||||
[BotKind.Selenium]: {
|
||||
window: ['_Selenium_IDE_Recorder', '_selenium', 'calledSelenium', /^([a-z]){3}_.*_(Array|Promise|Symbol)$/],
|
||||
document: ['__selenium_evaluate', 'selenium-evaluate', '__selenium_unwrapped'],
|
||||
},
|
||||
[BotKind.WebDriverIO]: {
|
||||
window: ['wdioElectron'],
|
||||
},
|
||||
[BotKind.WebDriver]: {
|
||||
window: [
|
||||
'webdriver',
|
||||
'__webdriverFunc',
|
||||
'__lastWatirAlert',
|
||||
'__lastWatirConfirm',
|
||||
'__lastWatirPrompt',
|
||||
'_WEBDRIVER_ELEM_CACHE',
|
||||
'ChromeDriverw',
|
||||
],
|
||||
document: [
|
||||
'__webdriver_script_fn',
|
||||
'__driver_evaluate',
|
||||
'__webdriver_evaluate',
|
||||
'__fxdriver_evaluate',
|
||||
'__driver_unwrapped',
|
||||
'__webdriver_unwrapped',
|
||||
'__fxdriver_unwrapped',
|
||||
'__webdriver_script_fn',
|
||||
'__webdriver_script_func',
|
||||
'__webdriver_script_function',
|
||||
'$cdc_asdjflasutopfhvcZLmcf',
|
||||
'$cdc_asdjflasutopfhvcZLmcfl_',
|
||||
'$chrome_asyncScriptInfo',
|
||||
'__$webdriverAsyncExecutor',
|
||||
],
|
||||
},
|
||||
[BotKind.HeadlessChrome]: {
|
||||
window: ['domAutomation', 'domAutomationController'],
|
||||
},
|
||||
};
|
||||
let botName;
|
||||
const result = {};
|
||||
const windowProps = getObjectProps(window);
|
||||
let documentProps = [];
|
||||
if (window.document !== undefined)
|
||||
documentProps = getObjectProps(window.document);
|
||||
for (botName in distinctivePropsList) {
|
||||
const props = distinctivePropsList[botName];
|
||||
if (props !== undefined) {
|
||||
const windowContains = props.window === undefined ? false : includes(windowProps, ...props.window);
|
||||
const documentContains = props.document === undefined || !documentProps.length ? false : includes(documentProps, ...props.document);
|
||||
result[botName] = windowContains || documentContains;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const sources = {
|
||||
android: isAndroid,
|
||||
browserKind: getBrowserKind,
|
||||
browserEngineKind: getBrowserEngineKind,
|
||||
documentFocus: getDocumentFocus,
|
||||
userAgent: getUserAgent,
|
||||
appVersion: getAppVersion,
|
||||
rtt: getRTT,
|
||||
windowSize: getWindowSize,
|
||||
pluginsLength: getPluginsLength,
|
||||
pluginsArray: getPluginsArray,
|
||||
errorTrace: getErrorTrace,
|
||||
productSub: getProductSub,
|
||||
windowExternal: getWindowExternal,
|
||||
mimeTypesConsistent: areMimeTypesConsistent,
|
||||
evalLength: getEvalLength,
|
||||
webGL: getWebGL,
|
||||
webDriver: getWebDriver,
|
||||
languages: getLanguages,
|
||||
notificationPermissions: getNotificationPermissions,
|
||||
documentElementKeys: getDocumentElementKeys,
|
||||
functionBind: getFunctionBind,
|
||||
process: getProcess,
|
||||
distinctiveProps: checkDistinctiveProperties,
|
||||
};
|
||||
|
||||
/**
|
||||
* Class representing a bot detector.
|
||||
*
|
||||
* @class
|
||||
* @implements {BotDetectorInterface}
|
||||
*/
|
||||
class BotDetector {
|
||||
constructor() {
|
||||
this.components = undefined;
|
||||
this.detections = undefined;
|
||||
}
|
||||
getComponents() {
|
||||
return this.components;
|
||||
}
|
||||
getDetections() {
|
||||
return this.detections;
|
||||
}
|
||||
/**
|
||||
* @inheritdoc
|
||||
*/
|
||||
detect() {
|
||||
if (this.components === undefined) {
|
||||
throw new Error("BotDetector.detect can't be called before BotDetector.collect");
|
||||
}
|
||||
const [detections, finalDetection] = detect(this.components, detectors);
|
||||
this.detections = detections;
|
||||
return finalDetection;
|
||||
}
|
||||
/**
|
||||
* @inheritdoc
|
||||
*/
|
||||
async collect() {
|
||||
this.components = await collect(sources);
|
||||
return this.components;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends an unpersonalized AJAX request to collect installation statistics
|
||||
*/
|
||||
function monitor() {
|
||||
// The FingerprintJS CDN (https://github.com/fingerprintjs/cdn) replaces `window.__fpjs_d_m` with `true`
|
||||
if (window.__fpjs_d_m || Math.random() >= 0.001) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const request = new XMLHttpRequest();
|
||||
request.open('get', `https://m1.openfpcdn.io/botd/v${version}/npm-monitoring`, true);
|
||||
request.send();
|
||||
}
|
||||
catch (error) {
|
||||
// console.error is ok here because it's an unexpected error handler
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(error);
|
||||
}
|
||||
}
|
||||
async function load({ monitoring = true } = {}) {
|
||||
if (monitoring) {
|
||||
monitor();
|
||||
}
|
||||
const detector = new BotDetector();
|
||||
await detector.collect();
|
||||
return detector;
|
||||
}
|
||||
var index = { load };
|
||||
|
||||
export { BotKind, BotdError, collect, index as default, detect, detectors, load, sources };
|
||||
27
tests/vendor/fingerprintjs-5.2.0.umd.min.js
vendored
Normal file
27
tests/vendor/fingerprintjs-5.2.0.umd.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue