mirror of
https://github.com/feder-cr/invisible_playwright.git
synced 2026-06-10 08:45:13 +02:00
Compare commits
57 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
12883bb4c7 | ||
|
|
ef86cd57dc | ||
|
|
2410582960 | ||
|
|
df4493d553 | ||
|
|
8ba88958be | ||
|
|
4564b26158 | ||
|
|
036a1a1d5f | ||
|
|
0b53e18e23 | ||
|
|
62cdf626a0 | ||
|
|
5dac302938 | ||
|
|
67b5e7cd5e | ||
|
|
5f546f4d63 | ||
|
|
2dd2224e73 | ||
|
|
610f09d2c2 | ||
|
|
90529ff181 | ||
|
|
8d7b6eafdf | ||
|
|
7260f461bb | ||
|
|
86a04d2d34 | ||
|
|
eec373a719 | ||
|
|
215b8801d7 | ||
|
|
cc7d95c8ae | ||
|
|
8bf72da40c | ||
|
|
e2bcd0cd4c | ||
|
|
6f44e1af38 | ||
|
|
262d388b99 | ||
|
|
b7eda606a2 | ||
|
|
e3b8a42ded | ||
|
|
26fa962d24 | ||
|
|
7b860b7398 | ||
|
|
f2664f96e1 | ||
|
|
369f3f7fdb | ||
|
|
d6c3de7730 | ||
|
|
143aff4bd2 | ||
|
|
ee0fe57ced | ||
|
|
929da150bc | ||
|
|
66c6b09821 | ||
|
|
f208f5262c | ||
|
|
35508595fa | ||
|
|
97a3cdfc17 | ||
|
|
033d0e9b35 | ||
|
|
def731e6ec | ||
|
|
4a71a0142a | ||
|
|
9432e789c6 | ||
|
|
75e6927904 | ||
|
|
f4d42dcac4 | ||
|
|
0375cf3f79 | ||
|
|
62b9030d2a | ||
|
|
60d13a2b6e | ||
|
|
3d8ba0b82c | ||
|
|
413db06690 | ||
|
|
70b6a54dbc | ||
|
|
bfccd61863 | ||
|
|
34aeb9601f | ||
|
|
64eef4daff | ||
|
|
cb3755cdd5 | ||
|
|
9571c3049d | ||
|
|
1701b34688 |
53 changed files with 5683 additions and 490 deletions
98
.github/ISSUE_TEMPLATE/01-launch-failure.yml
vendored
Normal file
98
.github/ISSUE_TEMPLATE/01-launch-failure.yml
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
name: Launch failure
|
||||||
|
description: Browser or wrapper fails to start (install errors, missing deps, profile load fails, never reaches new_page)
|
||||||
|
title: "[launch] "
|
||||||
|
labels: ["bug", "launch-failure"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Use this when the browser never reaches a usable state.
|
||||||
|
If it starts and the bug appears on a site or clicking something, use the site/action template instead.
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Version
|
||||||
|
description: Output of `python -m invisible_playwright version`.
|
||||||
|
placeholder: 0.1.7 (binary firefox-7)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: os
|
||||||
|
attributes:
|
||||||
|
label: OS
|
||||||
|
options:
|
||||||
|
- Windows 10/11 x86_64
|
||||||
|
- Linux x86_64
|
||||||
|
- macOS (unsupported)
|
||||||
|
- Other
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: python
|
||||||
|
attributes:
|
||||||
|
label: Python
|
||||||
|
placeholder: 3.11.7
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: install_cmd
|
||||||
|
attributes:
|
||||||
|
label: How you installed
|
||||||
|
placeholder: pip install invisible_playwright
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: snippet
|
||||||
|
attributes:
|
||||||
|
label: What you ran
|
||||||
|
description: Stop at the line that errors out. Redact creds.
|
||||||
|
render: python
|
||||||
|
value: |
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
with InvisiblePlaywright(seed=42) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: traceback
|
||||||
|
attributes:
|
||||||
|
label: Full traceback
|
||||||
|
description: The whole stack trace verbatim. Don't summarize.
|
||||||
|
render: text
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Extra logs
|
||||||
|
description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1`. Optional but speeds things up.
|
||||||
|
render: text
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: tried
|
||||||
|
attributes:
|
||||||
|
label: What you already tried
|
||||||
|
description: Reinstall, clear cache, different Python version, different proxy, etc.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: confirm
|
||||||
|
attributes:
|
||||||
|
label: Before submitting
|
||||||
|
options:
|
||||||
|
- label: Searched existing issues.
|
||||||
|
required: true
|
||||||
|
- label: On the latest released version.
|
||||||
|
required: true
|
||||||
|
- label: Removed credentials and personal paths from the snippet and logs.
|
||||||
|
required: true
|
||||||
167
.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml
vendored
Normal file
167
.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml
vendored
Normal file
|
|
@ -0,0 +1,167 @@
|
||||||
|
name: Site or action bug
|
||||||
|
description: Browser starts fine but a navigation, click, evaluate, or other operation fails or behaves wrong
|
||||||
|
title: "[bug] "
|
||||||
|
labels: ["bug"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
For bugs that happen after the browser is up.
|
||||||
|
If the browser never launches, use the launch failure template.
|
||||||
|
If a fingerprint detector flags the browser, use the stealth detection template.
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Version
|
||||||
|
description: Output of `python -m invisible_playwright version`.
|
||||||
|
placeholder: 0.1.7 (binary firefox-7)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: os
|
||||||
|
attributes:
|
||||||
|
label: OS
|
||||||
|
options:
|
||||||
|
- Windows 10/11 x86_64
|
||||||
|
- Linux x86_64
|
||||||
|
- macOS (unsupported)
|
||||||
|
- Other
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: python
|
||||||
|
attributes:
|
||||||
|
label: Python
|
||||||
|
placeholder: 3.11.7
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: headless
|
||||||
|
attributes:
|
||||||
|
label: headless=
|
||||||
|
description: Some bugs only repro on Windows headless=True (hidden alt-desktop path).
|
||||||
|
options:
|
||||||
|
- "True"
|
||||||
|
- "False"
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: proxy
|
||||||
|
attributes:
|
||||||
|
label: Proxy
|
||||||
|
description: Sites often vary by IP geo (e.g. GDPR consent shows only on UK/EU).
|
||||||
|
options:
|
||||||
|
- No proxy (host network)
|
||||||
|
- Residential, UK/GB
|
||||||
|
- Residential, US
|
||||||
|
- Residential, other country (specify in notes)
|
||||||
|
- Datacenter (specify provider in notes)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: profile
|
||||||
|
attributes:
|
||||||
|
label: Profile dir
|
||||||
|
options:
|
||||||
|
- Fresh each run (no profile_dir)
|
||||||
|
- Persistent profile_dir, reusing across runs
|
||||||
|
- Persistent profile_dir, first run creating it
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: url
|
||||||
|
attributes:
|
||||||
|
label: URL
|
||||||
|
description: The exact URL passed to `page.goto`. Not "the homepage" — the literal string.
|
||||||
|
placeholder: https://id.sky.com/
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: snippet
|
||||||
|
attributes:
|
||||||
|
label: Runnable reproduction
|
||||||
|
description: A complete snippet we can copy, paste, run. Stub creds with placeholders, keep everything else literal.
|
||||||
|
render: python
|
||||||
|
value: |
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, headless=True) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto("https://example.com/")
|
||||||
|
# the exact operation that fails:
|
||||||
|
page.click("button:has-text('Accept all')")
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: selector
|
||||||
|
attributes:
|
||||||
|
label: Selector or locator
|
||||||
|
description: The exact string passed to locator/click/frame_locator. Write N/A if not a selector bug.
|
||||||
|
placeholder: page.frame_locator("iframe[id^='sp_message_iframe_']").get_by_text("Accept all")
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: expected
|
||||||
|
attributes:
|
||||||
|
label: Expected
|
||||||
|
description: What should happen when the snippet runs?
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: actual
|
||||||
|
attributes:
|
||||||
|
label: Actual
|
||||||
|
description: What happens instead? Full traceback, error string verbatim, any page.on('crash') firing.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: screenshot
|
||||||
|
attributes:
|
||||||
|
label: Screenshot
|
||||||
|
description: Drag-drop a screenshot if the bug is visual. Optional but useful.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Browser logs
|
||||||
|
description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1 | tail -200`. Redact creds and real IPs.
|
||||||
|
render: text
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: notes
|
||||||
|
attributes:
|
||||||
|
label: Notes
|
||||||
|
description: Anything else, hypotheses, related issues, things you've already tried.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: confirm
|
||||||
|
attributes:
|
||||||
|
label: Before submitting
|
||||||
|
options:
|
||||||
|
- label: Searched existing issues.
|
||||||
|
required: true
|
||||||
|
- label: On the latest released version.
|
||||||
|
required: true
|
||||||
|
- label: The snippet above runs end-to-end on a clean Python install.
|
||||||
|
required: true
|
||||||
|
- label: Removed credentials, proxy passwords, real IPs, personal file paths.
|
||||||
|
required: true
|
||||||
141
.github/ISSUE_TEMPLATE/03-stealth-detection.yml
vendored
Normal file
141
.github/ISSUE_TEMPLATE/03-stealth-detection.yml
vendored
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
name: Stealth detection
|
||||||
|
description: A fingerprint detector flagged the browser as a bot, VM, VPN, anti-detect, tampered, or otherwise non-human
|
||||||
|
title: "[detect] "
|
||||||
|
labels: ["bug", "stealth"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Use this when something detects the browser (Fingerprint Pro, CreepJS, BotD, reCAPTCHA, Cloudflare, sannysoft, etc).
|
||||||
|
Bugs in operations (clicks, navigation) go to the site/action template.
|
||||||
|
Browser failing to start goes to the launch failure template.
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Version
|
||||||
|
placeholder: 0.1.7 (binary firefox-7)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: os
|
||||||
|
attributes:
|
||||||
|
label: OS
|
||||||
|
options:
|
||||||
|
- Windows 10/11 x86_64
|
||||||
|
- Linux x86_64
|
||||||
|
- macOS (unsupported)
|
||||||
|
- Other
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: headless
|
||||||
|
attributes:
|
||||||
|
label: headless=
|
||||||
|
options:
|
||||||
|
- "True"
|
||||||
|
- "False"
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: proxy
|
||||||
|
attributes:
|
||||||
|
label: Proxy
|
||||||
|
description: Datacenter or wrong-country proxies trip most detectors regardless of the browser. Be honest about what you used.
|
||||||
|
options:
|
||||||
|
- No proxy (host network)
|
||||||
|
- Residential, matching target geo
|
||||||
|
- Residential, different geo than target
|
||||||
|
- Datacenter (specify provider in notes)
|
||||||
|
- Mobile / 4G
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: detector
|
||||||
|
attributes:
|
||||||
|
label: Detector name and URL
|
||||||
|
description: Exact site / service / product that flagged us.
|
||||||
|
placeholder: Fingerprint Pro — https://demo.fingerprint.com/playground
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: scores
|
||||||
|
attributes:
|
||||||
|
label: Detector verdict
|
||||||
|
description: Paste the relevant flags / scores verbatim. For Fingerprint Pro paste `bot`, `vpn`, `virtual_machine`, `tampering*`, `vm_ml_score`, `suspect_score`. For CreepJS the headless / lies / trust scores. For reCAPTCHA v3 the score number.
|
||||||
|
render: text
|
||||||
|
placeholder: |
|
||||||
|
bot: bad
|
||||||
|
vpn: true
|
||||||
|
virtual_machine: true
|
||||||
|
vm_ml_score: 0.74
|
||||||
|
suspect_score: 22
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: screenshot
|
||||||
|
attributes:
|
||||||
|
label: Screenshot of the detector result
|
||||||
|
description: Drag-drop a screenshot of the detector page so we see what you see.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: snippet
|
||||||
|
attributes:
|
||||||
|
label: How you launched
|
||||||
|
description: The InvisiblePlaywright launch + navigation that produced the result above. Redact creds.
|
||||||
|
render: python
|
||||||
|
value: |
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, headless=True) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto("https://demo.fingerprint.com/playground")
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: expected
|
||||||
|
attributes:
|
||||||
|
label: What you expected
|
||||||
|
description: Most detectors will never give a perfect score for any browser. Tell us what threshold you'd accept (e.g. bot=not_detected, vm_ml_score < 0.3).
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: full_report
|
||||||
|
attributes:
|
||||||
|
label: Full detector response
|
||||||
|
description: For Fingerprint Pro paste the JSON from /api/event/v4/ if you have it. For CreepJS paste the full Smart Signals block. Optional but speeds things up a lot.
|
||||||
|
render: json
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: notes
|
||||||
|
attributes:
|
||||||
|
label: Notes
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: confirm
|
||||||
|
attributes:
|
||||||
|
label: Before submitting
|
||||||
|
options:
|
||||||
|
- label: Searched existing issues.
|
||||||
|
required: true
|
||||||
|
- label: On the latest released version.
|
||||||
|
required: true
|
||||||
|
- label: The detector verdict above is from a real run, not a hypothesis.
|
||||||
|
required: true
|
||||||
|
- label: Removed credentials, real IPs, FpJS visitor_id values, personal file paths from the snippet and full report.
|
||||||
|
required: true
|
||||||
79
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
79
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
|
|
@ -1,79 +0,0 @@
|
||||||
name: Bug report
|
|
||||||
description: Report a bug in the invisible_playwright Python wrapper
|
|
||||||
title: "[bug] "
|
|
||||||
labels: ["bug"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to file a bug report.
|
|
||||||
|
|
||||||
Before continuing, please:
|
|
||||||
- Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues?q=is%3Aissue) to avoid duplicates.
|
|
||||||
- If the bug is in the **patched Firefox itself** (canvas/WebGL/audio/font spoofing, a detector flagging the browser), open it at [feder-cr/firefox-stealth](https://github.com/feder-cr/firefox-stealth/issues) instead.
|
|
||||||
- **Do not** report security vulnerabilities here — follow [SECURITY.md](https://github.com/feder-cr/invisible_playwright/blob/main/SECURITY.md).
|
|
||||||
- type: input
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: invisible_playwright version
|
|
||||||
description: Output of `invisible_playwright version`
|
|
||||||
placeholder: "0.1.0 (binary 150.0.1)"
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: os
|
|
||||||
attributes:
|
|
||||||
label: Operating system
|
|
||||||
options:
|
|
||||||
- Windows x86_64
|
|
||||||
- Linux x86_64
|
|
||||||
- Other (please specify in description)
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: input
|
|
||||||
id: python
|
|
||||||
attributes:
|
|
||||||
label: Python version
|
|
||||||
placeholder: "3.11.7"
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: repro
|
|
||||||
attributes:
|
|
||||||
label: Minimal reproduction
|
|
||||||
description: A small, self-contained code snippet that triggers the bug. Strip out anything unrelated.
|
|
||||||
render: python
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: expected
|
|
||||||
attributes:
|
|
||||||
label: Expected behavior
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: actual
|
|
||||||
attributes:
|
|
||||||
label: Actual behavior
|
|
||||||
description: Include the full error message and traceback if any.
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Logs / additional context
|
|
||||||
description: Browser console output, environment variables, proxy config (redact credentials), etc.
|
|
||||||
render: text
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: checkboxes
|
|
||||||
id: confirm
|
|
||||||
attributes:
|
|
||||||
label: Confirmations
|
|
||||||
options:
|
|
||||||
- label: I have searched existing issues and this bug has not been reported.
|
|
||||||
required: true
|
|
||||||
- label: I am on the latest release.
|
|
||||||
required: true
|
|
||||||
- label: I have removed any credentials, proxy passwords, or sensitive data from logs.
|
|
||||||
required: true
|
|
||||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
8
.github/ISSUE_TEMPLATE/config.yml
vendored
|
|
@ -3,9 +3,9 @@ contact_links:
|
||||||
- name: Security vulnerability
|
- name: Security vulnerability
|
||||||
url: https://github.com/feder-cr/invisible_playwright/security/advisories/new
|
url: https://github.com/feder-cr/invisible_playwright/security/advisories/new
|
||||||
about: Report a security issue privately. Do NOT open a public issue.
|
about: Report a security issue privately. Do NOT open a public issue.
|
||||||
- name: Bug in the patched Firefox itself (canvas / WebGL / fonts / WebRTC / etc.)
|
- name: Bug in the patched Firefox source (C++, IDL, Juggler JS)
|
||||||
url: https://github.com/feder-cr/firefox-stealth/issues
|
url: https://github.com/feder-cr/invisible_firefox/issues
|
||||||
about: Spoofing/fingerprint bugs belong in the firefox-stealth repo.
|
about: Source-level patches in the Firefox fork go in the invisible_firefox repo. Detection results (FpJS, CreepJS, etc.) use the stealth detection template here.
|
||||||
- name: Question or general discussion
|
- name: Question or general discussion
|
||||||
url: https://github.com/feder-cr/invisible_playwright/discussions
|
url: https://github.com/feder-cr/invisible_playwright/discussions
|
||||||
about: For usage questions, ideas, and chat. Bugs and features still go in issues.
|
about: Usage questions, ideas, chat. Bugs and features still go in issues.
|
||||||
|
|
|
||||||
52
.github/workflows/e2e.yml
vendored
Normal file
52
.github/workflows/e2e.yml
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# e2e.yml — run the FULL browser-driving e2e suite (the 127 @pytest.mark.e2e)
|
||||||
|
# on GitHub, on every push/PR to main.
|
||||||
|
#
|
||||||
|
# Why this can run on CI when the drive-gate had to stay light: the drive-gate
|
||||||
|
# launched Firefox in true HEADLESS mode, which is content-process unstable on
|
||||||
|
# the hosted runners (eval-CSP / context-destroyed). The stealth wrapper instead
|
||||||
|
# launches Firefox HEADED on a real display; under `xvfb-run` (a virtual X
|
||||||
|
# server) that's exactly what we get on a headless CI box — stable, and the same
|
||||||
|
# thing webrtc-e2e.yml already relies on.
|
||||||
|
#
|
||||||
|
# Secret-free, so it's safe in public CI: the binary is the PUBLIC firefox-9
|
||||||
|
# release (no token), and the webrtc e2e fake a local TCP-only SOCKS. The proxy
|
||||||
|
# realness gate (fppro / smartproxy) is NOT here — it needs secrets and stays a
|
||||||
|
# local pre-release gate.
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
name: e2e
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
e2e:
|
||||||
|
name: e2e (linux, xvfb)
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
timeout-minutes: 40
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
with: { fetch-depth: 1 }
|
||||||
|
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||||
|
with: { python-version: '3.11' }
|
||||||
|
- name: Install wrapper + test deps (+ pinned Playwright)
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install ".[dev]"
|
||||||
|
python -m pip install "playwright==$(cat scripts/playwright_pin.txt)"
|
||||||
|
- name: System deps (xvfb + Firefox runtime libs)
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y xvfb
|
||||||
|
sudo "$(which python)" -m playwright install-deps firefox
|
||||||
|
- name: Fetch the published firefox binary
|
||||||
|
run: echo "FF=$(python -m invisible_playwright fetch | tail -1)" >> "$GITHUB_ENV"
|
||||||
|
- name: Run the full e2e suite under a virtual display
|
||||||
|
run: xvfb-run -a python scripts/run_e2e.py "$FF"
|
||||||
106
.github/workflows/firefox-launch-matrix.yml
vendored
Normal file
106
.github/workflows/firefox-launch-matrix.yml
vendored
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
name: firefox-launch-matrix
|
||||||
|
|
||||||
|
# Cross-Windows-edition smoke for the shipped firefox-N binary.
|
||||||
|
# Triggered by issue #22 (firefox-7 SxS mismatch on Win11 build 26200,
|
||||||
|
# reporter `jannusdorfer-create`).
|
||||||
|
#
|
||||||
|
# Runs the exact reporter snippet on every Windows runner GitHub offers,
|
||||||
|
# from a fresh checkout. If any matrix cell fails the same way, the bug
|
||||||
|
# is reproducible on at least one clean-ish environment and we ship a
|
||||||
|
# sidecar mozglue.manifest fix. If all cells pass, the bug is confined
|
||||||
|
# to the reporter's specific environment (Pro/Enterprise GPO, EDR, etc.).
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- '.github/workflows/firefox-launch-matrix.yml'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
smoke:
|
||||||
|
name: launch (${{ matrix.os }}, py${{ matrix.python }})
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [windows-2022, windows-2025, windows-latest]
|
||||||
|
python: ["3.11", "3.12", "3.13"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python ${{ matrix.python }}
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python }}
|
||||||
|
cache: pip
|
||||||
|
|
||||||
|
- name: Windows edition + build info
|
||||||
|
shell: pwsh
|
||||||
|
run: |
|
||||||
|
$os = Get-CimInstance Win32_OperatingSystem
|
||||||
|
Write-Host "Caption : $($os.Caption)"
|
||||||
|
Write-Host "BuildNumber: $($os.BuildNumber)"
|
||||||
|
Write-Host "OSArch : $($os.OSArchitecture)"
|
||||||
|
Write-Host "Edition : $((Get-CimInstance Win32_OperatingSystem).OperatingSystemSKU)"
|
||||||
|
Write-Host "---"
|
||||||
|
Write-Host "VC++ Redistributables installed:"
|
||||||
|
Get-ItemProperty 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\*' `
|
||||||
|
-ErrorAction SilentlyContinue |
|
||||||
|
Where-Object { $_.DisplayName -like '*Visual C++*Redist*' } |
|
||||||
|
Select-Object DisplayName, DisplayVersion |
|
||||||
|
Format-Table -AutoSize
|
||||||
|
|
||||||
|
- name: Install package from this commit
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install .
|
||||||
|
|
||||||
|
- name: Fetch firefox-7 binary
|
||||||
|
run: python -m invisible_playwright fetch
|
||||||
|
|
||||||
|
- name: Verify firefox.exe can launch standalone (the snippet that fails for issue #22)
|
||||||
|
shell: pwsh
|
||||||
|
run: |
|
||||||
|
# The platformdirs path has the duplicated `invisible-playwright` segment
|
||||||
|
# on Windows (user_cache_dir convention).
|
||||||
|
$ffPath = "$env:LOCALAPPDATA\invisible-playwright\invisible-playwright\Cache\firefox-7\firefox.exe"
|
||||||
|
if (-not (Test-Path $ffPath)) {
|
||||||
|
Write-Error "firefox.exe NOT FOUND at $ffPath"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
Write-Host "Launching: $ffPath --version"
|
||||||
|
# NOTE: firefox.exe --version on Windows prints the version but may
|
||||||
|
# return non-zero exit code (sub-process fork quirk). Check stdout.
|
||||||
|
$output = & $ffPath --version 2>&1 | Out-String
|
||||||
|
Write-Host "Output: $output"
|
||||||
|
if ($output -notmatch 'Mozilla Firefox \d') {
|
||||||
|
Write-Error "firefox.exe --version did not print a Mozilla Firefox version. Output was: $output"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
Write-Host "OK: firefox.exe runs and prints version."
|
||||||
|
|
||||||
|
- name: Run reporter's exact InvisiblePlaywright snippet
|
||||||
|
run: |
|
||||||
|
python -c "
|
||||||
|
import asyncio
|
||||||
|
from invisible_playwright.async_api import InvisiblePlaywright
|
||||||
|
async def main():
|
||||||
|
async with InvisiblePlaywright(seed=9128) as browser:
|
||||||
|
page = await browser.new_page()
|
||||||
|
await page.goto('about:blank')
|
||||||
|
print('OK: page loaded, url =', page.url)
|
||||||
|
asyncio.run(main())
|
||||||
|
"
|
||||||
|
|
||||||
|
- name: Upload diagnostics on failure
|
||||||
|
if: failure()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: launch-failure-${{ matrix.os }}-py${{ matrix.python }}
|
||||||
|
path: |
|
||||||
|
${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/firefox.exe
|
||||||
|
${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/mozglue.dll
|
||||||
|
if-no-files-found: warn
|
||||||
|
retention-days: 7
|
||||||
402
.github/workflows/release.yml
vendored
Normal file
402
.github/workflows/release.yml
vendored
Normal file
|
|
@ -0,0 +1,402 @@
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# release.yml — build all 5 patched-Firefox targets at $0 and publish them as
|
||||||
|
# DRAFT GitHub Release assets, named per the wrapper contract (constants.ARCHIVE_NAME).
|
||||||
|
# DRAFT on purpose: a human runs the realness gate and only THEN un-drafts + bumps
|
||||||
|
# BINARY_VERSION. Nothing auto-ships (issue #14 lesson).
|
||||||
|
#
|
||||||
|
# PACKAGING (issue #14: dangling symlinks broke 265 downloads — never again):
|
||||||
|
# Linux → cp -aL (dereference ALL symlinks into real files) + rm dev tools +
|
||||||
|
# strip + sanitize + tar at ROOT, then validate_release.py as a HARD
|
||||||
|
# in-pipeline gate (the exact battle-tested script from the source repo).
|
||||||
|
# Win → mach package; zip the CONTENTS of dist/firefox (clean tree, NOT
|
||||||
|
# dist/bin) so firefox.exe sits at the zip ROOT.
|
||||||
|
# macOS → mach package; ad-hoc codesign the .app; PRESERVE its internal relative
|
||||||
|
# symlinks (a .app legitimately has them — cp -aL would break it); verify
|
||||||
|
# every symlink is relative+internal; tar the bundle. --version self-gate.
|
||||||
|
#
|
||||||
|
# DRIVE GATE (the firefox-8 catcher): after build, every binary is DRIVEN by
|
||||||
|
# Playwright on its native runner (launch via juggler + real page + JS roundtrip,
|
||||||
|
# headless, no screenshot → GPU-free, zero proxy). A juggler-less binary renders
|
||||||
|
# a screenshot fine but is undrivable — only an actual drive catches that. The
|
||||||
|
# proxy realness gate (fppro/webrtc) stays LOCAL — it needs secrets.
|
||||||
|
#
|
||||||
|
# Trigger: push a tag `firefox-N`, or run manually. Hybrid runners, all free.
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
name: release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags: ['firefox-*']
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
source_ref:
|
||||||
|
description: 'invisible_firefox ref to build'
|
||||||
|
default: 'stealth/150'
|
||||||
|
release_tag:
|
||||||
|
description: 'release tag to publish the draft under (e.g. firefox-9)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
SOURCE_REPO: feder-cr/invisible_firefox
|
||||||
|
SOURCE_REF: ${{ github.event.inputs.source_ref || 'stealth/150' }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
name: build-${{ matrix.leg }}
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
timeout-minutes: 350
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- leg: linux-x86_64
|
||||||
|
runner: ubuntu-24.04
|
||||||
|
family: linux
|
||||||
|
target: ''
|
||||||
|
rust_target: x86_64-unknown-linux-gnu
|
||||||
|
win_disables: 'no'
|
||||||
|
extra_pkgs: ''
|
||||||
|
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||||
|
- leg: linux-arm64
|
||||||
|
runner: ubuntu-24.04-arm
|
||||||
|
family: linux
|
||||||
|
target: ''
|
||||||
|
rust_target: aarch64-unknown-linux-gnu
|
||||||
|
win_disables: 'no'
|
||||||
|
extra_pkgs: ''
|
||||||
|
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||||
|
- leg: win-x86_64
|
||||||
|
runner: ubuntu-24.04
|
||||||
|
family: win
|
||||||
|
target: x86_64-pc-windows-msvc
|
||||||
|
rust_target: x86_64-pc-windows-msvc
|
||||||
|
win_disables: 'yes'
|
||||||
|
extra_pkgs: 'msitools p7zip-full zip'
|
||||||
|
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||||
|
- leg: macos-arm64
|
||||||
|
runner: macos-15
|
||||||
|
family: mac
|
||||||
|
target: aarch64-apple-darwin
|
||||||
|
rust_target: aarch64-apple-darwin
|
||||||
|
win_disables: 'no'
|
||||||
|
extra_pkgs: ''
|
||||||
|
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||||
|
- leg: macos-x86_64
|
||||||
|
runner: macos-15-intel
|
||||||
|
family: mac
|
||||||
|
target: x86_64-apple-darwin
|
||||||
|
rust_target: x86_64-apple-darwin
|
||||||
|
win_disables: 'no'
|
||||||
|
extra_pkgs: ''
|
||||||
|
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||||
|
steps:
|
||||||
|
- name: Free disk + 16G swap (Linux runners)
|
||||||
|
if: matrix.family != 'mac'
|
||||||
|
run: |
|
||||||
|
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android \
|
||||||
|
/usr/local/share/boost "${AGENT_TOOLSDIRECTORY:-/opt/hostedtoolcache}" 2>/dev/null || true
|
||||||
|
sudo fallocate -l 16G /swapfile && sudo chmod 600 /swapfile && sudo mkswap /swapfile && sudo swapon /swapfile || true
|
||||||
|
|
||||||
|
- name: Checkout patched Firefox source
|
||||||
|
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
with:
|
||||||
|
repository: ${{ env.SOURCE_REPO }}
|
||||||
|
ref: ${{ env.SOURCE_REF }}
|
||||||
|
fetch-depth: 1
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||||
|
with: { python-version: '3.11' }
|
||||||
|
|
||||||
|
- name: Install Linux build tools
|
||||||
|
if: matrix.family != 'mac'
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y util-linux binutils ${{ matrix.extra_pkgs }}
|
||||||
|
|
||||||
|
- name: Select Xcode 26.2 + export SDK path (macOS)
|
||||||
|
if: matrix.family == 'mac'
|
||||||
|
run: |
|
||||||
|
sudo xcode-select -s /Applications/Xcode_26.2.app
|
||||||
|
SDKP="$(xcrun --show-sdk-path)"
|
||||||
|
echo "SDK_PATH=$SDKP" >> "$GITHUB_ENV"
|
||||||
|
echo "macOS SDK $(xcrun --sdk macosx --show-sdk-version) at $SDKP"
|
||||||
|
|
||||||
|
- name: Add Rust target
|
||||||
|
run: rustup target add ${{ matrix.rust_target }} || true
|
||||||
|
|
||||||
|
- name: Extend the repo .mozconfig (NO mold; +target/SDK as needed)
|
||||||
|
run: |
|
||||||
|
test -f .mozconfig || { echo "ERROR: no .mozconfig in source"; exit 1; }
|
||||||
|
rm -f mozconfig
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
echo "# --- release CI levers for ${{ matrix.leg }} (mold intentionally OFF — it segfaults libxul) ---"
|
||||||
|
echo "ac_add_options --disable-debug-symbols"
|
||||||
|
} >> .mozconfig
|
||||||
|
if [ -n "${{ matrix.target }}" ]; then echo "ac_add_options --target=${{ matrix.target }}" >> .mozconfig; fi
|
||||||
|
if [ "${{ matrix.family }}" = "mac" ]; then echo "ac_add_options --with-macos-sdk=$SDK_PATH" >> .mozconfig; fi
|
||||||
|
if [ "${{ matrix.win_disables }}" = "yes" ]; then
|
||||||
|
{ echo "ac_add_options --disable-default-browser-agent";
|
||||||
|
echo "ac_add_options --disable-maintenance-service";
|
||||||
|
echo "ac_add_options --disable-update-agent"; } >> .mozconfig
|
||||||
|
fi
|
||||||
|
if [ "${{ matrix.family }}" = "mac" ]; then NCPU=$(sysctl -n hw.ncpu); else NCPU=4; fi
|
||||||
|
{ echo "mk_add_options MOZ_PARALLEL_BUILD=$NCPU";
|
||||||
|
echo "mk_add_options MOZ_OBJDIR=@TOPSRCDIR@/obj-rel"; } >> .mozconfig
|
||||||
|
echo "----- final .mozconfig -----"; cat .mozconfig
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: ./mach build
|
||||||
|
|
||||||
|
# ── LINUX: dereference symlinks (issue #14) + strip + sanitize + tar@root + GATE
|
||||||
|
- name: Package + validate (Linux)
|
||||||
|
if: matrix.family == 'linux'
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
DIST=obj-rel/dist/bin
|
||||||
|
STAGING=staging
|
||||||
|
rm -rf "$STAGING"; mkdir -p "$STAGING" out
|
||||||
|
cp -aL "$DIST/." "$STAGING/" # -L: dereference ALL symlinks into real files
|
||||||
|
N=$(find "$STAGING" -type l | wc -l)
|
||||||
|
[ "$N" -eq 0 ] || { echo "ERROR: $N symlinks remain after cp -aL"; exit 1; }
|
||||||
|
for t in xpcshell certutil pk12util rapl; do rm -f "$STAGING/$t"; done
|
||||||
|
# JUGGLER GATE: the binary is undrivable by Playwright without it (see 70-known-bugs)
|
||||||
|
{ [ -e "$STAGING/chrome/juggler.manifest" ] && [ -d "$STAGING/chrome/juggler" ]; } \
|
||||||
|
|| { echo "ERROR: juggler missing from package (chrome/juggler) — Playwright can't drive it"; exit 1; }
|
||||||
|
echo "juggler GATE OK (loose chrome/juggler present)"
|
||||||
|
find "$STAGING" -type f \
|
||||||
|
\( -name '*.so' -o -name firefox -o -name firefox-bin -o -name plugin-container \
|
||||||
|
-o -name pingsender -o -name glxtest -o -name vaapitest -o -name updater \) \
|
||||||
|
-exec strip --strip-debug {} + 2>/dev/null || true
|
||||||
|
STAGING="$STAGING" python3 scripts/linux_sanitize.py || true # no-op in CI (no /home/feder), defensive
|
||||||
|
tar --owner=0 --group=0 --numeric-owner --mtime="2026-01-01 00:00:00 UTC" \
|
||||||
|
-czf "out/${{ matrix.asset }}" -C "$STAGING" . # firefox at ROOT
|
||||||
|
echo "=== HARD GATE: scripts/validate_release.py (the issue-#14 protector) ==="
|
||||||
|
python3 scripts/validate_release.py --linux "out/${{ matrix.asset }}" --linux-only
|
||||||
|
ls -la out/
|
||||||
|
|
||||||
|
# ── WINDOWS (cross): zip the CLEAN dist/firefox tree, firefox.exe at root
|
||||||
|
- name: Package (Windows cross)
|
||||||
|
if: matrix.family == 'win'
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
# Do NOT swallow a mach failure: `./mach package || echo` lets set -e pass
|
||||||
|
# and would fall through to a stale tree. A release MUST come from the clean
|
||||||
|
# dist/firefox; dist/bin is the dev tree (cruft + loose juggler that masked
|
||||||
|
# the firefox-7/8 packaging bugs), never acceptable for a release.
|
||||||
|
./mach package
|
||||||
|
[ -f obj-rel/dist/firefox/firefox.exe ] \
|
||||||
|
|| { echo "ERROR: mach package did not produce a clean dist/firefox tree"; exit 1; }
|
||||||
|
WIN_APP=obj-rel/dist/firefox
|
||||||
|
echo "packaging from: $WIN_APP"
|
||||||
|
# JUGGLER GATE: omni.ja must carry juggler (else Playwright can't drive it)
|
||||||
|
[ -f "$WIN_APP/omni.ja" ] || { echo "ERROR: no omni.ja in $WIN_APP"; exit 1; }
|
||||||
|
python3 -c "import zipfile,sys; sys.exit(0 if any('juggler' in n.lower() for n in zipfile.ZipFile('$WIN_APP/omni.ja').namelist()) else 1)" \
|
||||||
|
|| { echo "ERROR: juggler missing from $WIN_APP/omni.ja — Playwright can't drive it"; exit 1; }
|
||||||
|
echo "juggler GATE OK (win)"
|
||||||
|
mkdir -p out
|
||||||
|
( cd "$WIN_APP" && zip -qr "$GITHUB_WORKSPACE/out/${{ matrix.asset }}" . ) # firefox.exe at zip ROOT
|
||||||
|
ls -la out/
|
||||||
|
|
||||||
|
# ── macOS: package .app, ad-hoc sign, verify relative-internal symlinks, --version gate, tar
|
||||||
|
- name: Package + validate (macOS)
|
||||||
|
if: matrix.family == 'mac'
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
./mach package
|
||||||
|
APP="$(find obj-rel/dist -maxdepth 2 -name '*.app' -type d | head -1)"
|
||||||
|
[ -n "$APP" ] || { echo "ERROR: no .app produced"; exit 1; }
|
||||||
|
echo "built app: $APP"
|
||||||
|
# JUGGLER GATE: the .app's omni.ja must carry juggler (else Playwright can't drive it)
|
||||||
|
python3 -c "import zipfile,sys,glob; jas=glob.glob('$APP/Contents/Resources/omni.ja')+glob.glob('$APP/Contents/Resources/browser/omni.ja'); sys.exit(0 if jas and any(any('juggler' in n.lower() for n in zipfile.ZipFile(j).namelist()) for j in jas) else 1)" \
|
||||||
|
|| { echo "ERROR: juggler missing from .app omni.ja — Playwright can't drive it"; exit 1; }
|
||||||
|
echo "juggler GATE OK (mac)"
|
||||||
|
codesign --force --deep --sign - --timestamp=none "$APP"
|
||||||
|
codesign --verify --deep --strict --verbose=2 "$APP"
|
||||||
|
echo "=== --version GATE ==="
|
||||||
|
"$APP/Contents/MacOS/firefox" --version
|
||||||
|
echo "=== critical files present ==="
|
||||||
|
for need in "Contents/MacOS/firefox" "Contents/Info.plist"; do
|
||||||
|
[ -e "$APP/$need" ] || { echo "ERROR: missing $need"; exit 1; }
|
||||||
|
done
|
||||||
|
echo "=== Info.plist well-formed + required keys (a malformed plist → Finder 'damaged') ==="
|
||||||
|
plutil -lint "$APP/Contents/Info.plist"
|
||||||
|
for key in CFBundleExecutable CFBundleIdentifier CFBundleShortVersionString; do
|
||||||
|
plutil -extract "$key" raw -o - "$APP/Contents/Info.plist" >/dev/null \
|
||||||
|
|| { echo "ERROR: Info.plist missing $key"; exit 1; }
|
||||||
|
done
|
||||||
|
EXEC="$(plutil -extract CFBundleExecutable raw -o - "$APP/Contents/Info.plist")"
|
||||||
|
[ -e "$APP/Contents/MacOS/$EXEC" ] \
|
||||||
|
|| { echo "ERROR: CFBundleExecutable '$EXEC' has no matching binary in Contents/MacOS"; exit 1; }
|
||||||
|
echo "=== verify NO absolute symlinks in the .app (relative-internal ones are fine) ==="
|
||||||
|
BAD="$(find "$APP" -type l -print0 | xargs -0 -I{} sh -c 't=$(readlink "{}"); case "$t" in /*) echo "{} -> $t";; esac')"
|
||||||
|
[ -z "$BAD" ] || { echo "ERROR: absolute symlinks in .app (break on user machines):"; echo "$BAD" | head -5; exit 1; }
|
||||||
|
echo "mac .app OK: critical files present, no absolute symlinks"
|
||||||
|
STABLE="$(dirname "$APP")/Firefox.app"
|
||||||
|
[ "$APP" = "$STABLE" ] || mv "$APP" "$STABLE"
|
||||||
|
mkdir -p out
|
||||||
|
tar -czf "out/${{ matrix.asset }}" -C "$(dirname "$STABLE")" Firefox.app # preserves internal symlinks
|
||||||
|
ls -la out/
|
||||||
|
|
||||||
|
- name: Upload build artifact
|
||||||
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||||
|
with:
|
||||||
|
name: asset-${{ matrix.leg }}
|
||||||
|
path: out/${{ matrix.asset }}
|
||||||
|
if-no-files-found: error
|
||||||
|
retention-days: 7
|
||||||
|
|
||||||
|
# DRIVE GATE — the firefox-8 catcher. A raw `firefox --screenshot` proves
|
||||||
|
# nothing about automation: a juggler-less binary renders fine and ships
|
||||||
|
# broken (firefox-8 did exactly that). So we DRIVE every binary the way users
|
||||||
|
# will: Playwright launches it over the juggler pipe, loads a real page, and
|
||||||
|
# round-trips JS. A binary missing/broken juggler throws TargetClosedError
|
||||||
|
# here and the release never publishes. Headless, NO screenshot → GPU-free,
|
||||||
|
# so it can't false-fail on the GPU-less hosted runners. Zero proxy / zero
|
||||||
|
# secrets → safe in public CI (the proxy realness gate stays local, by design).
|
||||||
|
# Each leg runs on its NATIVE runner so we test the real artifact, not a cross
|
||||||
|
# surrogate. Playwright is pinned to a version validated against this build's
|
||||||
|
# juggler; bump it in lockstep when the juggler is re-synced from upstream.
|
||||||
|
gate:
|
||||||
|
name: gate-${{ matrix.leg }}
|
||||||
|
needs: build
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
timeout-minutes: 25
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
# `extra: --full` adds the mouse/keyboard/canvas/navsurface interaction
|
||||||
|
# checks. Only on linux-x86_64 (historically the most reliable hosted
|
||||||
|
# runner): the interaction code is platform-identical JS (omni.ja), so
|
||||||
|
# one reliable full run catches a firefox-2-class regression for all
|
||||||
|
# platforms. The other legs run SMOKE (launch+http+UA+webdriver) — the
|
||||||
|
# firefox-8/juggler catcher — which is robust even on the flaky
|
||||||
|
# windows-latest runner. See scripts/ci_drive_gate.py.
|
||||||
|
- leg: linux-x86_64
|
||||||
|
runner: ubuntu-24.04
|
||||||
|
kind: linux
|
||||||
|
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||||
|
extra: '--full'
|
||||||
|
- leg: linux-arm64
|
||||||
|
runner: ubuntu-24.04-arm
|
||||||
|
kind: linux
|
||||||
|
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||||
|
extra: ''
|
||||||
|
- leg: win-x86_64
|
||||||
|
runner: windows-latest
|
||||||
|
kind: win
|
||||||
|
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||||
|
extra: ''
|
||||||
|
- leg: macos-arm64
|
||||||
|
runner: macos-15
|
||||||
|
kind: mac
|
||||||
|
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||||
|
extra: ''
|
||||||
|
- leg: macos-x86_64
|
||||||
|
runner: macos-15-intel
|
||||||
|
kind: mac
|
||||||
|
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||||
|
extra: ''
|
||||||
|
steps:
|
||||||
|
- name: Checkout wrapper (for scripts/ci_drive_gate.py)
|
||||||
|
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
with: { fetch-depth: 1 }
|
||||||
|
- name: Download asset
|
||||||
|
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||||
|
with:
|
||||||
|
name: asset-${{ matrix.leg }}
|
||||||
|
path: art
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||||
|
with: { python-version: '3.11' }
|
||||||
|
- name: Install Playwright driver (no bundled browser — we override executable_path)
|
||||||
|
# Pin from a SINGLE source (scripts/playwright_pin.txt) so release.yml and
|
||||||
|
# verify-assets.yml can't drift to different versions. The drive gate then
|
||||||
|
# ENFORCES playwright↔juggler compatibility: an incompatible pin fails the
|
||||||
|
# launch/drive (TargetClosedError / protocol error) and nothing publishes.
|
||||||
|
# Bump the pin file in lockstep when the juggler is re-synced from upstream.
|
||||||
|
shell: bash
|
||||||
|
run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)"
|
||||||
|
- name: Linux system deps for headless firefox
|
||||||
|
if: matrix.kind == 'linux'
|
||||||
|
run: sudo "$(which python)" -m playwright install-deps firefox
|
||||||
|
- name: Extract + locate firefox binary
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
mkdir -p ff
|
||||||
|
A="art/${{ matrix.asset }}"
|
||||||
|
case "${{ matrix.kind }}" in
|
||||||
|
win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";;
|
||||||
|
linux) tar xzf "$A" -C ff; EXE="ff/firefox";;
|
||||||
|
mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";;
|
||||||
|
esac
|
||||||
|
[ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; }
|
||||||
|
chmod +x "$EXE" 2>/dev/null || true
|
||||||
|
echo "FF_EXE=$EXE" >> "$GITHUB_ENV"
|
||||||
|
echo "located: $EXE"
|
||||||
|
- name: DRIVE GATE — Playwright launch via juggler + real page (+ interaction on --full)
|
||||||
|
shell: bash
|
||||||
|
run: python scripts/ci_drive_gate.py "$FF_EXE" ${{ matrix.extra }}
|
||||||
|
|
||||||
|
publish:
|
||||||
|
name: publish-draft-release
|
||||||
|
needs: [build, gate]
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
steps:
|
||||||
|
- name: Download all build assets
|
||||||
|
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||||
|
with: { pattern: asset-*, path: dl, merge-multiple: true }
|
||||||
|
- name: Assert all 5 target archives present (no silent partial release)
|
||||||
|
run: |
|
||||||
|
cd dl
|
||||||
|
EXPECTED="
|
||||||
|
firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||||
|
firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||||
|
firefox-150.0.1-stealth-win-x86_64.zip
|
||||||
|
firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||||
|
firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||||
|
"
|
||||||
|
for a in $EXPECTED; do
|
||||||
|
[ -s "$a" ] || { echo "ERROR: missing/empty release asset: $a (a build leg silently dropped out?)"; exit 1; }
|
||||||
|
done
|
||||||
|
echo "all 5 target archives present"
|
||||||
|
- name: Generate checksums.txt
|
||||||
|
run: |
|
||||||
|
cd dl; ls -la
|
||||||
|
# explicit glob — never include checksums.txt itself (the `*`-includes-itself trap)
|
||||||
|
sha256sum firefox-150.0.1-stealth-* > checksums.txt
|
||||||
|
echo "----- checksums.txt -----"; cat checksums.txt
|
||||||
|
- name: Resolve release tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
TAG="${{ github.event.inputs.release_tag }}"
|
||||||
|
[ -z "$TAG" ] && TAG="${GITHUB_REF_NAME}"
|
||||||
|
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "publishing DRAFT release for tag: $TAG"
|
||||||
|
- name: Create DRAFT release with all assets
|
||||||
|
uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2
|
||||||
|
with:
|
||||||
|
tag_name: ${{ steps.tag.outputs.tag }}
|
||||||
|
name: invisible_firefox (150.0.1) rev ${{ steps.tag.outputs.tag }}
|
||||||
|
draft: true
|
||||||
|
prerelease: false
|
||||||
|
fail_on_unmatched_files: true
|
||||||
|
files: |
|
||||||
|
dl/*.tar.gz
|
||||||
|
dl/*.zip
|
||||||
|
dl/checksums.txt
|
||||||
|
body: |
|
||||||
|
Patched Firefox 150.0.1 — built on GitHub Actions ($0, no mold).
|
||||||
|
Targets: linux-x86_64, linux-arm64, win-x86_64, macos-arm64, macos-x86_64.
|
||||||
|
|
||||||
|
DRAFT — do not publish until validate_release.py + realness gate pass on all archives.
|
||||||
|
|
||||||
|
macOS: ad-hoc signed (not notarized). After download run:
|
||||||
|
xattr -dr com.apple.quarantine Firefox.app
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
111
.github/workflows/verify-assets.yml
vendored
Normal file
111
.github/workflows/verify-assets.yml
vendored
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# verify-assets.yml — re-runnable DRIVE GATE for an EXISTING release's assets.
|
||||||
|
#
|
||||||
|
# release.yml drive-gates every binary it builds. This does the same drive test
|
||||||
|
# WITHOUT rebuilding: it downloads a release's already-published assets (works on
|
||||||
|
# DRAFT releases too via GITHUB_TOKEN) and drives each one on its native runner.
|
||||||
|
#
|
||||||
|
# Use it to:
|
||||||
|
# • drive-test a release that was built before the in-pipeline gate existed
|
||||||
|
# (e.g. firefox-9, built on the old release.yml), or
|
||||||
|
# • re-verify any shipped release on demand (regression check).
|
||||||
|
#
|
||||||
|
# Same single-source-of-truth drive logic as release.yml: scripts/ci_drive_gate.py.
|
||||||
|
# Headless, no screenshot → GPU-free. Zero proxy / zero secrets.
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
name: verify-assets
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
release_tag:
|
||||||
|
description: 'release tag whose assets to drive-test (e.g. firefox-9)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
# write (not read) is required: GitHub only exposes DRAFT releases to tokens
|
||||||
|
# with push access. With contents:read, `gh release download` on a draft tag
|
||||||
|
# 404s ("release not found"). This workflow only READS assets — the elevated
|
||||||
|
# scope is solely to make draft releases visible to GITHUB_TOKEN.
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
drive:
|
||||||
|
name: drive-${{ matrix.leg }}
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
timeout-minutes: 25
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
# --full (interaction) only on the reliable linux-x86_64 leg; others run
|
||||||
|
# the robust SMOKE drive. Same rationale as release.yml's gate.
|
||||||
|
- leg: linux-x86_64
|
||||||
|
runner: ubuntu-24.04
|
||||||
|
kind: linux
|
||||||
|
asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz
|
||||||
|
extra: '--full'
|
||||||
|
- leg: linux-arm64
|
||||||
|
runner: ubuntu-24.04-arm
|
||||||
|
kind: linux
|
||||||
|
asset: firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||||
|
extra: ''
|
||||||
|
- leg: win-x86_64
|
||||||
|
runner: windows-latest
|
||||||
|
kind: win
|
||||||
|
asset: firefox-150.0.1-stealth-win-x86_64.zip
|
||||||
|
extra: ''
|
||||||
|
- leg: macos-arm64
|
||||||
|
runner: macos-15
|
||||||
|
kind: mac
|
||||||
|
asset: firefox-150.0.1-stealth-macos-arm64.tar.gz
|
||||||
|
extra: ''
|
||||||
|
- leg: macos-x86_64
|
||||||
|
runner: macos-15-intel
|
||||||
|
kind: mac
|
||||||
|
asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz
|
||||||
|
extra: ''
|
||||||
|
steps:
|
||||||
|
- name: Checkout wrapper (for scripts/ci_drive_gate.py)
|
||||||
|
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||||
|
with: { fetch-depth: 1 }
|
||||||
|
- name: Download the release asset (draft releases included)
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
mkdir -p art
|
||||||
|
gh release download "${{ github.event.inputs.release_tag }}" \
|
||||||
|
--repo "${{ github.repository }}" \
|
||||||
|
--pattern "${{ matrix.asset }}" \
|
||||||
|
--dir art
|
||||||
|
ls -la art/
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||||
|
with: { python-version: '3.11' }
|
||||||
|
- name: Install Playwright driver (no bundled browser — we override executable_path)
|
||||||
|
# Single-source pin (see release.yml); the drive gate enforces juggler compat.
|
||||||
|
shell: bash
|
||||||
|
run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)"
|
||||||
|
- name: Linux system deps for headless firefox
|
||||||
|
if: matrix.kind == 'linux'
|
||||||
|
run: sudo "$(which python)" -m playwright install-deps firefox
|
||||||
|
- name: Extract + locate firefox binary
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
mkdir -p ff
|
||||||
|
A="art/${{ matrix.asset }}"
|
||||||
|
case "${{ matrix.kind }}" in
|
||||||
|
win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";;
|
||||||
|
linux) tar xzf "$A" -C ff; EXE="ff/firefox";;
|
||||||
|
mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";;
|
||||||
|
esac
|
||||||
|
[ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; }
|
||||||
|
chmod +x "$EXE" 2>/dev/null || true
|
||||||
|
echo "FF_EXE=$EXE" >> "$GITHUB_ENV"
|
||||||
|
echo "located: $EXE"
|
||||||
|
- name: DRIVE GATE — Playwright launch via juggler + real page (+ interaction on --full)
|
||||||
|
shell: bash
|
||||||
|
run: python scripts/ci_drive_gate.py "$FF_EXE" ${{ matrix.extra }}
|
||||||
47
.github/workflows/webrtc-e2e.yml
vendored
Normal file
47
.github/workflows/webrtc-e2e.yml
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
name: webrtc-e2e
|
||||||
|
|
||||||
|
# Live WebRTC realness check against the shipped patched binary.
|
||||||
|
#
|
||||||
|
# Manual (workflow_dispatch) on purpose: it needs a firefox-N binary that
|
||||||
|
# carries the WebRTC fixes (synthetic srflx in genuine nICEr form + the
|
||||||
|
# default-route fallback behind a proxy). Run it after publishing such a
|
||||||
|
# binary — it is the release gate for "WebRTC looks real behind a proxy".
|
||||||
|
# Until that binary ships, test_not_blocked_behind_tcp_only_socks is EXPECTED
|
||||||
|
# to fail (the old binary is fully blocked behind a SOCKS proxy), which is the
|
||||||
|
# whole point of the gate.
|
||||||
|
#
|
||||||
|
# No smartproxy / credentials: the "behind a proxy" condition is faked by an
|
||||||
|
# in-process TCP-only SOCKS5 server (refuses UDP ASSOCIATE) and the egress IP
|
||||||
|
# is injected as an RFC 5737 TEST-NET address. Fully self-contained.
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
webrtc-e2e:
|
||||||
|
name: webrtc realness (ubuntu, py3.12)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: pip
|
||||||
|
|
||||||
|
- name: Install package + dev extras
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -e ".[dev]"
|
||||||
|
|
||||||
|
- name: Fetch the patched Firefox binary
|
||||||
|
run: python -m invisible_playwright fetch
|
||||||
|
|
||||||
|
- name: Resolve binary path
|
||||||
|
run: echo "STEALTHFOX_E2E_BINARY=$(python -m invisible_playwright path)" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
|
- name: Run WebRTC realness e2e (xvfb for the headless Firefox)
|
||||||
|
run: |
|
||||||
|
sudo apt-get update && sudo apt-get install -y xvfb
|
||||||
|
xvfb-run -a pytest tests/test_webrtc_realness.py -m e2e -o addopts="" -v -rs
|
||||||
44
CHANGELOG.md
44
CHANGELOG.md
|
|
@ -6,6 +6,46 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- `timezone="auto"`: the browser timezone is auto-derived from the egress IP. By default (no explicit timezone) it ALWAYS resolves — from the proxy egress when a proxy is set, otherwise from the host's own public IP — so the zone can never disagree with the IP (the classic `timezone_mismatch` signal). An explicit `"Area/City"` is the only way to force a specific zone. On failure: with a proxy the launch raises (no silent host-TZ fallback behind a foreign proxy); without a proxy it falls back to the host TZ so a transient lookup can't break the launch.
|
||||||
|
- The egress IP is mapped to its IANA zone with an offline mmdb (`daijro/geoip-all-in-one`). It auto-updates against the upstream weekly rebuild: cached locally, re-checked after `GEOIP_REFRESH_DAYS` (7), older copies pruned, and a stale cache is reused when offline. `STEALTHFOX_GEOIP_MMDB` points at your own `.mmdb` to skip the download.
|
||||||
|
- `resolve_session_timezone(timezone, proxy)` and `ensure_geoip_mmdb()` re-exported at the package root (plus `GeoTimezoneError`) so integrations that own their launch can reproduce the resolution.
|
||||||
|
- `tests/test_geo.py` (37) + `tests/test_geoip_update.py` (freshness / auto-update / offline fallback) unit tests.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- New runtime dependencies: `requests[socks]` (SOCKS egress lookup), `maxminddb` (mmdb reader), `tzdata` (IANA database for `zoneinfo`, which Windows lacks).
|
||||||
|
|
||||||
|
## [0.2.0] - 2026-05-28
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Public config helpers in `invisible_playwright.config`: `get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display)` returns a complete `firefox_user_prefs` dict; `get_default_args()` returns the baseline CLI args list (currently empty). Both also re-exported at the package root.
|
||||||
|
- `invisible_playwright.ensure_binary` re-exported at the package root for parity with the `cloakbrowser.download.ensure_binary` integration pattern that downstream projects (Skyvern, Crawlee, agno) already expect.
|
||||||
|
- These helpers let third-party fetchers (changedetection.io plugins, Crawlee `BrowserPool` subclasses, agno toolkits) drive `playwright.firefox.launch(executable_path=..., firefox_user_prefs=...)` themselves without depending on the `InvisiblePlaywright` context manager owning the lifecycle.
|
||||||
|
- `tests/unit/test_config_public.py`: 14 unit tests covering deterministic seed, locale / timezone / pin / extra_prefs / humanize variations, and round-trip via the public namespace.
|
||||||
|
|
||||||
|
### Unchanged
|
||||||
|
- `InvisiblePlaywright` context manager surface is identical (backwards compatible).
|
||||||
|
- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build.
|
||||||
|
|
||||||
|
## [0.1.8] - 2026-05-23
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- [#20](https://github.com/feder-cr/invisible_playwright/issues/20): cross-origin iframes were unreachable from Playwright. `element_handle.content_frame()` returned `None`, `frame.evaluate()` threw cross-origin SOP errors, and `frame_locator(...).click()` timed out even with `force=True`. Root cause: FF150 defaults `fission.webContentIsolationStrategy=1` (`IsolateEverything`), which site-isolates every cross-origin iframe into a separate `webIsolated` content process even when `fission.autostart=False`. The parent's Juggler FrameTree then has a Frame placeholder with no docShell and no URL — every protocol op that needs to enter the iframe fails. Fix: pin `fission.webContentIsolationStrategy=0` (`IsolateNothing`) in the baseline prefs. The setting can be flipped back per session via `extra_prefs={"fission.webContentIsolationStrategy": 1}`.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- `tests/test_cross_origin_iframe.py`: 4 unit + 5 e2e regression sentinels for cross-origin iframe interaction. The e2e layer runs entirely offline against two local HTTP servers on `127.0.0.1` (two ports = two SOP origins) and covers `page.frames` URL tracking, `content_frame()`, `frame.evaluate()`, `frame_locator(...).locator(...)`, and end-to-end `dispatch_event("click")` for plain, sandboxed and titled iframes. A future FF upgrade or fingerprint A/B that flips the pref back to `1` will fail the suite before shipping.
|
||||||
|
|
||||||
|
### Unchanged
|
||||||
|
- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build was needed.
|
||||||
|
|
||||||
|
## [0.1.7] - 2026-05-21
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- [#18](https://github.com/feder-cr/invisible_playwright/issues/18): Tab crash when running with `headless=True` on Windows on pages that trigger cross-process navigation. Two separate bugs that only manifested together: (1) the Chromium content sandbox at default level 6 puts content processes on `kAlternateWinstation`, but the wrapper hides the browser window on its own alt-desktop (`CreateDesktop` for headless on Windows). Mismatched desktops → cross-process navigations couldn't reparent windows → content process exits cleanly and Playwright fires `page.on('crash')`. (2) The canvas2d `getImageData` stealth spoof wrote to a read-only mapped `DataSourceSurface`. On GPU-backed canvases that memory is write-protected → segfault during the final `getImageData` at page unload. Wrapper now sets `security.sandbox.content.level=4` in the alt-desktop workaround set, and `firefox-7` ships the source fix that moves the noise to the JS array's writable backing buffer.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- `BINARY_VERSION` bumped from `firefox-5` to `firefox-7`. `firefox-6` was rolled back when its partial fix turned out to be wrong (the iframe-burst hypothesis was a dead end; bisection in the evening found the real two-bug cause documented above).
|
||||||
|
|
||||||
## [0.1.6] - 2026-05-21
|
## [0.1.6] - 2026-05-21
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
@ -33,7 +73,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
## [0.1.3] - 2026-05-19
|
## [0.1.3] - 2026-05-19
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible-firefox#stealth/150](https://github.com/feder-cr/invisible-firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`).
|
- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible_firefox#stealth/150](https://github.com/feder-cr/invisible_firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`).
|
||||||
- The patched Firefox archive now ships the **proper C++ implementation** of `windowUtils.jugglerSendMouseEvent`, replacing the JS shim from 0.1.2.
|
- The patched Firefox archive now ships the **proper C++ implementation** of `windowUtils.jugglerSendMouseEvent`, replacing the JS shim from 0.1.2.
|
||||||
|
|
||||||
### C++ fixes landed in this release
|
### C++ fixes landed in this release
|
||||||
|
|
@ -44,7 +84,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
- **C7 (partial)**: storage stub for `nsIDocShell.languageOverride`. Workaround `InvisiblePlaywright(locale="")` recommended until full BC FIELD port lands.
|
- **C7 (partial)**: storage stub for `nsIDocShell.languageOverride`. Workaround `InvisiblePlaywright(locale="")` recommended until full BC FIELD port lands.
|
||||||
|
|
||||||
### Verified
|
### Verified
|
||||||
- Both archives built from same source: feder-cr/invisible-firefox commit `68906f1f9c55`.
|
- Both archives built from same source: feder-cr/invisible_firefox commit `68906f1f9c55`.
|
||||||
- Windows + Linux smoke suite green: launch, `ctx.new_page()`, `page.mouse.{move,down,up,click,wheel}`, `navigator.webdriver=false`, sannysoft 32/33 PASS.
|
- Windows + Linux smoke suite green: launch, `ctx.new_page()`, `page.mouse.{move,down,up,click,wheel}`, `navigator.webdriver=false`, sannysoft 32/33 PASS.
|
||||||
- SHA256 published in `checksums.txt` on the `firefox-3` release.
|
- SHA256 published in `checksums.txt` on the `firefox-3` release.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ Thanks for your interest in improving this project. Contributions are welcome vi
|
||||||
- **Bug?** Open a [bug report](https://github.com/feder-cr/invisible_playwright/issues/new?template=bug_report.yml).
|
- **Bug?** Open a [bug report](https://github.com/feder-cr/invisible_playwright/issues/new?template=bug_report.yml).
|
||||||
- **Idea?** Open a [feature request](https://github.com/feder-cr/invisible_playwright/issues/new?template=feature_request.yml).
|
- **Idea?** Open a [feature request](https://github.com/feder-cr/invisible_playwright/issues/new?template=feature_request.yml).
|
||||||
- **Security issue?** Do **not** open a public issue — see [SECURITY.md](SECURITY.md).
|
- **Security issue?** Do **not** open a public issue — see [SECURITY.md](SECURITY.md).
|
||||||
- **The C++ patches** live in the companion repo [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there.
|
- **The C++ patches** live in the companion repo [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there.
|
||||||
|
|
||||||
## Scope
|
## Scope
|
||||||
|
|
||||||
|
|
@ -18,7 +18,7 @@ This repository ships the **Python wrapper** (`invisible_playwright`) around a p
|
||||||
- Binary download/caching, CLI, proxy plumbing
|
- Binary download/caching, CLI, proxy plumbing
|
||||||
- Tests, docs, examples, packaging
|
- Tests, docs, examples, packaging
|
||||||
|
|
||||||
Out of scope (belongs in `invisible-firefox`):
|
Out of scope (belongs in `invisible_firefox`):
|
||||||
|
|
||||||
- Changes to the Firefox C++ source
|
- Changes to the Firefox C++ source
|
||||||
- New preferences exposed by the patched binary
|
- New preferences exposed by the patched binary
|
||||||
|
|
@ -65,7 +65,7 @@ Before opening, please:
|
||||||
|
|
||||||
- Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues) — the bug may already be tracked.
|
- Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues) — the bug may already be tracked.
|
||||||
- Reproduce on the **latest release** if possible.
|
- Reproduce on the **latest release** if possible.
|
||||||
- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible-firefox` instead.
|
- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible_firefox` instead.
|
||||||
|
|
||||||
Include:
|
Include:
|
||||||
|
|
||||||
|
|
|
||||||
103
README.md
103
README.md
|
|
@ -6,56 +6,26 @@
|
||||||
[](https://www.mozilla.org/firefox/)
|
[](https://www.mozilla.org/firefox/)
|
||||||
[](https://github.com/feder-cr/invisible_playwright/releases)
|
[](https://github.com/feder-cr/invisible_playwright/releases)
|
||||||
[](https://github.com/feder-cr/invisible_playwright/stargazers)
|
[](https://github.com/feder-cr/invisible_playwright/stargazers)
|
||||||
|
[](https://github.com/feder-cr/invisible_firefox/releases/tag/usage-counter)
|
||||||
|
|
||||||
[](https://it.linkedin.com/in/federico-elia-5199951b6)
|
[](https://it.linkedin.com/in/federico-elia-5199951b6)
|
||||||
|
|
||||||
A patched Firefox **100% Playwright-compatible** that passes the hardest browser-fingerprint detectors in the wild.
|
**Stealth Firefox that passes every bot detection test. Drop-in Playwright replacement, fingerprint patched at the C++ level, not a JavaScript shim.**
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
## Results
|
|
||||||
|
|
||||||
### Google reCAPTCHA v3 - **0.90 / 1.0**
|
|
||||||
|
|
||||||
Top-tier score. Google classifies the session as "very likely a human". Most anti-detect stacks plateau around 0.3-0.7.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
### Fingerprint Pro - **bot: not detected, VPN: false, tampering: false, dev tools: not detected**
|
|
||||||
|
|
||||||
FingerprintJS Pro's full Smart Signals battery flips every flag to "Not detected". Browser correctly identified as Firefox 150 on Windows 10. Confidence score 0.9.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
### CreepJS - **0 lies**, fingerprint is internally coherent
|
|
||||||
|
|
||||||
No contradictions between headless hints, spoofed values, and real rendering output. That "0 lies" is what kills most anti-detect browsers: one inconsistency (e.g. Chrome UA + Firefox WebGL) and the trust score collapses.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
### BrowserLeaks WebRTC - **no public IP leak**
|
|
||||||
|
|
||||||
WebRTC srflx address is the proxy egress IP; host candidates are private LAN. The real public IP never leaks via STUN, even on pages that configure their own ICE servers. Stock Firefox exposes an mDNS hostname (e.g. `abc-1234.local`) as a host ICE candidate, which is itself a stable per-session signal detectors fingerprint. invisible_playwright replaces host candidates with synthetic private-LAN IPs that match the spoofed network, removing the mDNS tell.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
### bot.sannysoft.com - **all checks pass**
|
|
||||||
|
|
||||||
Every row green: WebDriver not present, Chrome-only properties absent, plugin/mime/languages arrays coherent, permissions API correct, iframe/source window checks pass.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Why it's powerful
|
## Why it's powerful
|
||||||
|
|
||||||
**Most anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems:
|
|
||||||
|
**Most other anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems:
|
||||||
|
|
||||||
1. **JS patches are detectable.** Anti-bots enumerate native function `.toString()`, check descriptor configurability, compare property enumeration order, watch for prototype mutations. Every patch leaves a fingerprint of its own. CreepJS has an entire battery of "lies detectors" built around this.
|
1. **JS patches are detectable.** Anti-bots enumerate native function `.toString()`, check descriptor configurability, compare property enumeration order, watch for prototype mutations. Every patch leaves a fingerprint of its own. CreepJS has an entire battery of "lies detectors" built around this.
|
||||||
2. **Chromium itself is now suspect.** Residential-proxy bot traffic is overwhelmingly Chromium-based, so detectors weight anything Chromium-shaped as risky by default. Chromium-based forks inherit Chrome's open-source layers (BoringSSL, Blink, V8, ANGLE) cleanly, but they still cannot fully match Chrome in practice: Chrome ships closed-source components on top (Widevine, proprietary codecs, Google Update / Safe Browsing endpoints) that flip detectable JS feature flags and network signals, and forks lag Chrome's release cadence by days to weeks, leaving telltale version-specific behaviours that detectors lock onto.
|
2. **Chromium itself is now suspect.** Residential-proxy bot traffic is overwhelmingly Chromium-based, so detectors weight anything Chromium-shaped as risky by default. Chromium-based forks inherit Chrome's open-source layers (BoringSSL, Blink, V8, ANGLE) cleanly, but they still cannot fully match Chrome in practice: Chrome ships closed-source components on top (Widevine, proprietary codecs, Google Update / Safe Browsing endpoints) that flip detectable JS feature flags and network signals, and forks lag Chrome's release cadence by days to weeks, leaving telltale version-specific behaviours that detectors lock onto.
|
||||||
|
|
||||||
**invisible_playwright patches Firefox at the C++ level.** The spoofed values come back out through the normal Gecko paths - there is no JS shim, no override, no `Object.defineProperty`. **From the page's point of view, the browser is just telling the truth.** Anti-bot lie-detectors have nothing to latch onto.
|
**invisible_playwright patches Firefox at the C++ level.** The spoofed values come back out through the normal Gecko paths - there is no JS shim, no override, no `Object.defineProperty`. **From the page's point of view, the browser is just telling the truth.** Anti-bot lie-detectors have nothing to latch onto.
|
||||||
|
|
||||||
invisible_playwright spoofs **all the layers that matter, together, coherently** — Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) for the full per-layer breakdown of which C++ files are patched and why.
|
invisible_playwright spoofs **all the layers that matter, together, coherently**: Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) for the full per-layer breakdown of which C++ files are patched and why.
|
||||||
|
|
||||||
Everything is driven by preferences - no hardcoded values in the binary. You change one pref, you change the spoofed value.
|
Everything is driven by preferences - no hardcoded values in the binary. You change one pref, you change the spoofed value.
|
||||||
|
|
||||||
|
|
@ -63,23 +33,21 @@ Everything is driven by preferences - no hardcoded values in the binary. You cha
|
||||||
|
|
||||||
## How it compares
|
## How it compares
|
||||||
|
|
||||||
Commercial anti-detect browsers (Multilogin Mimic, GoLogin Orbita, AdsPower, Dolphin Anty) ship patched Chromium and apply most spoofing at the JavaScript layer. A few (Kameleo, Multilogin Stealthfox) also offer Firefox-based profiles, but the spoofing pattern is the same: runtime overrides on top of an unmodified rendering engine. That's the ceiling - and it's a low one.
|
**CloakBrowser** ships a similar pitch for Chromium, but its binary is **closed source** (the source-level patches are not published, you only get the compiled output), and it still hits the Chromium reCAPTCHA ceiling. The commercial anti-detect browsers (**Multilogin**, **GoLogin**, AdsPower, Dolphin, Kameleo) are paid SaaS that overlay JS-layer spoofing on a patched Chromium. Managed profiles are nice but raw detection bypass sits below both Camoufox and us.
|
||||||
|
|
||||||
| | invisible_playwright | Multilogin / GoLogin | AdsPower / Dolphin | Kameleo |
|
| | invisible_playwright | Camoufox | CloakBrowser | Multilogin |
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| Engine | Firefox (open source) | Chromium fork | Chromium fork | Chromium |
|
| Engine | Firefox 150 | Firefox (~1 year old base) | Chromium | Chromium fork |
|
||||||
| Patch depth | C++ source | JS overrides | JS overrides | JS overrides |
|
| Patch depth | C++ source | C++ source | C++ source | JS overrides |
|
||||||
| `.toString()` clean | ✅ Native Gecko path | ❌ Detectable shims | ❌ Detectable shims | ❌ Detectable shims |
|
| Maintenance | Active | Gap (~1 year) | Active | Active SaaS |
|
||||||
| Canvas / WebGL | ✅ C++ level | ⚠️ JS override | ⚠️ JS override | ⚠️ JS override |
|
| Open source | ✅ MIT | ✅ MPL | ❌ Closed source | ❌ Closed source |
|
||||||
| SOCKS5 auth | ✅ Patched | ⚠️ Varies | ⚠️ Varies | ❌ |
|
| `.toString()` clean | ✅ | ✅ | ✅ | ❌ Detectable shims |
|
||||||
| Self-hosted | ✅ | ❌ SaaS | ❌ SaaS | ❌ Cloud |
|
| Canvas / WebGL / Audio | ✅ C++ | ⚠️ Drift vs current FF | ✅ C++ | ⚠️ JS override |
|
||||||
| reCAPTCHA v3 score | **0.90** | ~0.3-0.6 | ~0.3-0.5 | ~0.3-0.5 |
|
| SOCKS5 auth | ✅ Patched | ❌ | ⚠️ Playwright proxy | ⚠️ Varies |
|
||||||
|
| **reCAPTCHA v3 score** | **0.90** | ~0.3-0.5 | ~0.3-0.5 | ~0.3-0.6 |
|
||||||
| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected |
|
| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected |
|
||||||
| FP Pro - tampering | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected |
|
| CreepJS lies | ✅ 0 | ❌ Multiple | ✅ 0 | ❌ Multiple |
|
||||||
| FP Pro - VPN flag | ✅ false | ❌ true | ❌ true | ❌ true |
|
| Cost | Free | Free | Free | From $99/mo |
|
||||||
| CreepJS lies | ✅ 0 | ❌ multiple | ❌ multiple | ❌ multiple |
|
|
||||||
|
|
||||||
Competitor scores reflect our own testing on Windows 10 against the same five detection suites used above; results may vary with their evolving builds.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -172,6 +140,21 @@ with InvisiblePlaywright(proxy=proxy) as browser:
|
||||||
|
|
||||||
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
|
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
|
||||||
|
|
||||||
|
### Timezone
|
||||||
|
|
||||||
|
The browser timezone follows `timezone=`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# default: timezone is auto-derived from the egress IP (proxy egress if a
|
||||||
|
# proxy is set, otherwise the host's own public IP)
|
||||||
|
with InvisiblePlaywright(proxy=proxy) as browser:
|
||||||
|
...
|
||||||
|
|
||||||
|
# explicit IANA zone always wins — the only way to force a specific zone
|
||||||
|
with InvisiblePlaywright(proxy=proxy, timezone="America/New_York") as browser:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
### Pinning specific fingerprint fields
|
### Pinning specific fingerprint fields
|
||||||
|
|
||||||
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:
|
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:
|
||||||
|
|
@ -203,24 +186,6 @@ invisible_playwright version # wrapper and binary versions
|
||||||
invisible_playwright clear-cache # remove all cached binaries
|
invisible_playwright clear-cache # remove all cached binaries
|
||||||
```
|
```
|
||||||
|
|
||||||
## Known issues
|
|
||||||
|
|
||||||
### `headless=True` on Windows can cause tab crashes on sites with heavy cross-process navigation
|
|
||||||
|
|
||||||
Reported as [#18](https://github.com/feder-cr/invisible_playwright/issues/18) (`id.sky.com` and similar). On Windows, `headless=True` runs Firefox headed on a hidden alt-desktop created via `CreateDesktop`. Some sites (id.sky.com, anything else loading Adobe AppMeasurement in a way that triggers cross-process navigation) end up firing `page.on('crash')` after about 10 seconds. The cause is a window-parenting interaction between the alt-desktop and the GPU/content processes; the workaround is one of:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Option A — keep the visible window (no alt-desktop)
|
|
||||||
with InvisiblePlaywright(seed=42, headless=False) as browser:
|
|
||||||
...
|
|
||||||
|
|
||||||
# Option B — run inside Xvfb on Linux (alt-desktop bug is Windows-only)
|
|
||||||
```
|
|
||||||
|
|
||||||
The visible window case works on every site we've tested. Linux + Xvfb is unaffected.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Related projects
|
## Related projects
|
||||||
|
|
||||||
invisible_playwright takes a different angle than the major Firefox-hardening projects but stands on their shoulders:
|
invisible_playwright takes a different angle than the major Firefox-hardening projects but stands on their shoulders:
|
||||||
|
|
@ -233,4 +198,4 @@ invisible_playwright takes a different angle than the major Firefox-hardening pr
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox).
|
MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox).
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ In scope:
|
||||||
|
|
||||||
Out of scope here (report to the relevant project):
|
Out of scope here (report to the relevant project):
|
||||||
|
|
||||||
- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox/security/advisories/new)
|
- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox/security/advisories/new)
|
||||||
- Vulnerabilities in upstream Firefox / mozilla-central — report to Mozilla per https://www.mozilla.org/security/
|
- Vulnerabilities in upstream Firefox / mozilla-central — report to Mozilla per https://www.mozilla.org/security/
|
||||||
- Vulnerabilities in third-party dependencies (`playwright`, `requests`, etc.) — report to those projects directly
|
- Vulnerabilities in third-party dependencies (`playwright`, `requests`, etc.) — report to those projects directly
|
||||||
|
|
||||||
|
|
|
||||||
BIN
docs/screenshots/hero.gif
Normal file
BIN
docs/screenshots/hero.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 479 KiB |
|
|
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "invisible-playwright"
|
name = "invisible-playwright"
|
||||||
version = "0.1.6"
|
version = "0.2.0"
|
||||||
description = "Playwright wrapper for a patched Firefox with deterministic stealth profile."
|
description = "Playwright wrapper for a patched Firefox with deterministic stealth profile."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|
@ -22,13 +22,15 @@ classifiers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"playwright>=1.40",
|
"playwright>=1.40",
|
||||||
"platformdirs>=4",
|
"platformdirs>=4",
|
||||||
"requests>=2.31",
|
"requests[socks]>=2.31",
|
||||||
|
"maxminddb>=2.2",
|
||||||
|
"tzdata>=2024.1",
|
||||||
"tqdm>=4.66",
|
"tqdm>=4.66",
|
||||||
"pywin32>=306; sys_platform == 'win32'",
|
"pywin32>=306; sys_platform == 'win32'",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
dev = ["pytest>=7", "pytest-mock>=3", "responses>=0.24", "build>=1"]
|
dev = ["pytest>=7", "pytest-mock>=3", "responses>=0.24", "build>=1", "pytest-rerunfailures>=14", "playwright>=1.40"]
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
markers = [
|
markers = [
|
||||||
|
|
|
||||||
172
scripts/ci_drive_gate.py
Normal file
172
scripts/ci_drive_gate.py
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""CI drive gate — the firefox-N catcher.
|
||||||
|
|
||||||
|
A raw `firefox --screenshot` proves nothing about automation: a juggler-less
|
||||||
|
binary renders a screenshot just fine and ships broken (firefox-8 did exactly
|
||||||
|
that). This DRIVES the binary the way users will — Playwright launches it over
|
||||||
|
the juggler pipe and exercises real paths.
|
||||||
|
|
||||||
|
Two levels (see `--full`):
|
||||||
|
|
||||||
|
SMOKE (default — run on ALL 5 legs, on every binary's native runner):
|
||||||
|
launch over juggler-pipe → navigate a real http://127.0.0.1 page → assert a
|
||||||
|
response, the Firefox UA, navigator.webdriver falsy, and a DOM read. This is
|
||||||
|
the firefox-8 catcher (a juggler-less binary throws TargetClosedError on
|
||||||
|
launch) plus a base stealth + drivability check. It is intentionally LIGHT:
|
||||||
|
the free hosted runners — windows-latest especially — are content-process
|
||||||
|
unstable under a heavy headless interaction sequence (clicks/moves cascade
|
||||||
|
into "context destroyed" / selector-timeout / eval-CSP), so the gate that
|
||||||
|
must be GREEN on every leg stays minimal and reliable.
|
||||||
|
|
||||||
|
FULL (`--full` — run on the historically-reliable Linux leg):
|
||||||
|
SMOKE plus mouse + keyboard input (firefox-2 / issue #9:
|
||||||
|
jugglerSendMouseEvent/synthesizeMouseEvent), canvas determinism (stealth
|
||||||
|
seed must be per-session), and navigator-surface tells. The interaction code
|
||||||
|
is platform-identical JS (it lives in omni.ja), so exercising it on one
|
||||||
|
reliable leg catches a regression for ALL platforms; win interaction is
|
||||||
|
additionally covered by local pre-release testing.
|
||||||
|
|
||||||
|
NOT covered here: WebGL determinism (needs SWGL, false-fails headless) and the
|
||||||
|
faithful cross-origin iframe test (issue #20) — both live in the local realness
|
||||||
|
gate. All checks here are headless, no screenshot (GPU-free), loopback-only
|
||||||
|
(no external network / proxy / secrets) → safe in public CI.
|
||||||
|
|
||||||
|
Robustness: a real loopback HTTP page (NOT data: / about:blank — those get
|
||||||
|
re-normalized / carry an eval-blocking CSP), arrow-function evaluates (never
|
||||||
|
eval'd), and up to 2 retries on transient context-destroyed/detached/timeout.
|
||||||
|
A genuinely broken binary fails ALL attempts → the gate fails.
|
||||||
|
|
||||||
|
Usage: python ci_drive_gate.py <firefox-binary> [--full]
|
||||||
|
Exit 0 + "DRIVE GATE OK ..." on success; non-zero with a reason on failure.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import http.server
|
||||||
|
import socketserver
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
|
||||||
|
HTML = (
|
||||||
|
"<!doctype html><html><head><title>dt</title></head><body>"
|
||||||
|
"<h1 id=x>hello-drive</h1>"
|
||||||
|
"<button id=b>go</button>"
|
||||||
|
"<input id=inp>"
|
||||||
|
"<script>"
|
||||||
|
"window.__clicked=0;window.__moves=0;"
|
||||||
|
"document.getElementById('b').addEventListener('click',function(){window.__clicked=1;});"
|
||||||
|
"window.addEventListener('mousemove',function(){window.__moves++;});"
|
||||||
|
"</script>"
|
||||||
|
"</body></html>"
|
||||||
|
).encode()
|
||||||
|
|
||||||
|
CANVAS_DRAW = (
|
||||||
|
"() => {const c=document.createElement('canvas');c.width=c.height=16;"
|
||||||
|
"const g=c.getContext('2d');g.fillStyle='#08f';g.fillRect(0,0,16,16);"
|
||||||
|
"g.fillStyle='#f40';g.fillText('s',2,12);return c.toDataURL();}"
|
||||||
|
)
|
||||||
|
|
||||||
|
_TRANSIENT = ("context was destroyed", "frame was detached", "target closed",
|
||||||
|
"because of a navigation", "timeout", "blocked by csp")
|
||||||
|
|
||||||
|
|
||||||
|
class _Handler(http.server.BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self): # noqa: N802
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(HTML)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(HTML)
|
||||||
|
|
||||||
|
def log_message(self, *a): # silence per-request stderr noise
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _start_server():
|
||||||
|
srv = socketserver.TCPServer(("127.0.0.1", 0), _Handler)
|
||||||
|
threading.Thread(target=srv.serve_forever, daemon=True).start()
|
||||||
|
return srv, srv.server_address[1]
|
||||||
|
|
||||||
|
|
||||||
|
def _drive(exe: str, url: str, full: bool) -> str:
|
||||||
|
"""One full drive attempt. Returns the UA on success; raises on failure."""
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.firefox.launch(executable_path=exe, headless=True)
|
||||||
|
try:
|
||||||
|
page = browser.new_page()
|
||||||
|
resp = page.goto(url, wait_until="load")
|
||||||
|
assert resp and resp.ok, f"navigation to {url} failed: {resp.status if resp else 'no response'}"
|
||||||
|
ua = page.evaluate("() => navigator.userAgent")
|
||||||
|
webdriver = page.evaluate("() => navigator.webdriver")
|
||||||
|
text = page.evaluate("() => document.getElementById('x').textContent")
|
||||||
|
|
||||||
|
inter = {}
|
||||||
|
if full:
|
||||||
|
# firefox-2 / issue-#9 catcher: real mouse + keyboard over juggler.
|
||||||
|
page.wait_for_selector("#b")
|
||||||
|
page.mouse.move(20, 20)
|
||||||
|
page.mouse.move(120, 90) # synthesizeMouseEvent path
|
||||||
|
page.click("#b") # mousedown/up/click → listener fires
|
||||||
|
page.click("#inp")
|
||||||
|
page.keyboard.type("ok")
|
||||||
|
inter["clicked"] = page.evaluate("() => window.__clicked")
|
||||||
|
inter["moves"] = page.evaluate("() => window.__moves")
|
||||||
|
inter["typed"] = page.evaluate("() => document.getElementById('inp').value")
|
||||||
|
inter["canvas_a"] = page.evaluate(CANVAS_DRAW)
|
||||||
|
inter["canvas_b"] = page.evaluate(CANVAS_DRAW)
|
||||||
|
inter["langs"] = page.evaluate("() => navigator.languages.length")
|
||||||
|
inter["plugins"] = page.evaluate("() => navigator.plugins instanceof PluginArray")
|
||||||
|
finally:
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
# SMOKE asserts (always).
|
||||||
|
assert "Firefox" in ua, f"unexpected UA (binary not driving correctly): {ua!r}"
|
||||||
|
assert text == "hello-drive", f"DOM/JS roundtrip failed: {text!r}"
|
||||||
|
assert not webdriver, f"navigator.webdriver leaked True (stealth regression): {webdriver!r}"
|
||||||
|
|
||||||
|
if full:
|
||||||
|
assert inter["clicked"] == 1, "page.click() did not fire the click listener — mouse-event synthesis broken (firefox-2 class)"
|
||||||
|
assert inter["moves"] >= 1, "page.mouse.move() produced no mousemove — jugglerSendMouseEvent regression"
|
||||||
|
assert inter["typed"] == "ok", f"page.keyboard.type() failed: {inter['typed']!r}"
|
||||||
|
assert inter["canvas_a"] == inter["canvas_b"], "canvas non-deterministic across identical draws (stealth seed broken → bot tell)"
|
||||||
|
assert inter["langs"] and inter["langs"] > 0, "navigator.languages empty (headless tell)"
|
||||||
|
assert inter["plugins"], "navigator.plugins is not a PluginArray (headless tell)"
|
||||||
|
return ua
|
||||||
|
|
||||||
|
|
||||||
|
def main(exe: str, full: bool) -> int:
|
||||||
|
srv, port = _start_server()
|
||||||
|
url = f"http://127.0.0.1:{port}/"
|
||||||
|
level = "full" if full else "smoke"
|
||||||
|
extras = "http+click+mousemove+keyboard+canvas-determinism+navsurface" if full else "http+ua+webdriver+dom"
|
||||||
|
last = None
|
||||||
|
try:
|
||||||
|
for attempt in (1, 2, 3):
|
||||||
|
try:
|
||||||
|
ua = _drive(exe, url, full)
|
||||||
|
if attempt > 1:
|
||||||
|
print(f"(note: drive succeeded on attempt {attempt} after a transient error)")
|
||||||
|
print(f"DRIVE GATE OK [{level}] | UA={ua} | {extras}=ok")
|
||||||
|
return 0
|
||||||
|
except Exception as e: # noqa: BLE001 — gate: any failure must surface
|
||||||
|
last = e
|
||||||
|
msg = str(e).lower()
|
||||||
|
if attempt < 3 and any(t in msg for t in _TRANSIENT):
|
||||||
|
print(f"(transient error on attempt {attempt}, retrying): {e}", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
srv.shutdown()
|
||||||
|
print(f"DRIVE GATE FAILED [{level}]: {last}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = sys.argv[1:]
|
||||||
|
full = "--full" in args
|
||||||
|
positional = [a for a in args if not a.startswith("--")]
|
||||||
|
if len(positional) != 1:
|
||||||
|
print("usage: ci_drive_gate.py <path-to-firefox-binary> [--full]", file=sys.stderr)
|
||||||
|
sys.exit(2)
|
||||||
|
sys.exit(main(positional[0], full))
|
||||||
1
scripts/playwright_pin.txt
Normal file
1
scripts/playwright_pin.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
1.55.0
|
||||||
67
scripts/run_e2e.py
Normal file
67
scripts/run_e2e.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Run the FULL e2e suite (every test that opens the browser) against a binary.
|
||||||
|
|
||||||
|
The 127 ``@pytest.mark.e2e`` tests are excluded from the default `pytest` run
|
||||||
|
(`addopts = -m 'not slow and not e2e'`) because they need a real Firefox binary
|
||||||
|
and a display, and they skip themselves when no binary is available. That makes
|
||||||
|
them easy to forget — and "we can't afford for something to not work". This is
|
||||||
|
the gate that runs them all, deliberately, against a chosen binary.
|
||||||
|
|
||||||
|
It is the MANDATORY pre-release e2e gate: run it green against the freshly-built
|
||||||
|
release binary BEFORE un-drafting a firefox-N (alongside the fppro + WebRTC
|
||||||
|
realness gates). It is NOT in the public CI drive-gate — the hosted runners are
|
||||||
|
content-process unstable under a heavy headless interaction sequence (see
|
||||||
|
70-known-bugs / 60-ci-release-pipeline); this runs locally on reliable hardware.
|
||||||
|
|
||||||
|
Flake-resilience: under full-suite load a couple of interaction tests (dblclick,
|
||||||
|
hover/mouseenter) can flake even though they pass 3/3 in isolation, so failures
|
||||||
|
are reran up to twice on the known transient signatures. A genuinely broken
|
||||||
|
binary fails all attempts. The webrtc e2e fake a TCP-only SOCKS locally (no
|
||||||
|
proxy/secrets), so the whole suite is offline.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/run_e2e.py <firefox-binary>
|
||||||
|
python scripts/run_e2e.py # uses $INVPW_BINARY_PATH
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
_RERUN_SIGNATURES = "Timeout|context was destroyed|was detached|not visible|because of a navigation|TargetClosed"
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
binary = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("INVPW_BINARY_PATH")
|
||||||
|
if not binary:
|
||||||
|
print("usage: run_e2e.py <firefox-binary> (or set INVPW_BINARY_PATH)", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
if not Path(binary).exists():
|
||||||
|
print(f"ERROR: binary not found: {binary}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
env = dict(os.environ)
|
||||||
|
# One setting drives the whole suite: conftest's firefox_binary fixture and
|
||||||
|
# the webrtc e2e both resolve from these.
|
||||||
|
env["INVPW_BINARY_PATH"] = binary
|
||||||
|
env["STEALTHFOX_E2E_BINARY"] = binary
|
||||||
|
|
||||||
|
repo = Path(__file__).resolve().parent.parent
|
||||||
|
cmd = [
|
||||||
|
sys.executable, "-m", "pytest",
|
||||||
|
"-m", "e2e",
|
||||||
|
"-o", "addopts=", # override the default 'not e2e' deselection
|
||||||
|
"--reruns", "2", "--reruns-delay", "1",
|
||||||
|
"--only-rerun", _RERUN_SIGNATURES,
|
||||||
|
"-p", "no:cacheprovider",
|
||||||
|
"-q", "--tb=short",
|
||||||
|
] + sys.argv[2:]
|
||||||
|
print(f"[run_e2e] binary={binary}")
|
||||||
|
print(f"[run_e2e] {' '.join(cmd)}")
|
||||||
|
return subprocess.run(cmd, cwd=repo, env=env).returncode
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
|
|
@ -15,8 +15,30 @@ Quickstart:
|
||||||
page = browser.new_page()
|
page = browser.new_page()
|
||||||
page.click("#submit") # expanded into a Bezier trajectory
|
page.click("#submit") # expanded into a Bezier trajectory
|
||||||
"""
|
"""
|
||||||
from .launcher import InvisiblePlaywright
|
from .config import get_default_args, get_default_stealth_prefs
|
||||||
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
||||||
|
from ._geo import GeoTimezoneError, resolve_session_timezone
|
||||||
|
from .download import ensure_binary, ensure_geoip_mmdb
|
||||||
|
from .launcher import InvisiblePlaywright
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
from importlib.metadata import PackageNotFoundError, version as _pkg_version
|
||||||
__all__ = ["InvisiblePlaywright", "BINARY_VERSION", "FIREFOX_UPSTREAM_VERSION", "__version__"]
|
|
||||||
|
try:
|
||||||
|
__version__ = _pkg_version("invisible-playwright")
|
||||||
|
except PackageNotFoundError:
|
||||||
|
# Editable / source checkout without an install record: fall back to a
|
||||||
|
# marker rather than risk shipping a stale hardcoded string.
|
||||||
|
__version__ = "0.0.0+unknown"
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"InvisiblePlaywright",
|
||||||
|
"ensure_binary",
|
||||||
|
"ensure_geoip_mmdb",
|
||||||
|
"get_default_stealth_prefs",
|
||||||
|
"get_default_args",
|
||||||
|
"resolve_session_timezone",
|
||||||
|
"GeoTimezoneError",
|
||||||
|
"BINARY_VERSION",
|
||||||
|
"FIREFOX_UPSTREAM_VERSION",
|
||||||
|
"__version__",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,12 @@ _FONT_POOL = _load("font_pool.json")
|
||||||
_FONT_CORE: list = _FONT_POOL["core"]
|
_FONT_CORE: list = _FONT_POOL["core"]
|
||||||
_FONT_OPTIONAL: list = _FONT_POOL["optional"]
|
_FONT_OPTIONAL: list = _FONT_POOL["optional"]
|
||||||
_CPT_FONTS_OPT = _load("cpt_fonts_optional_given_class.json")["table"]
|
_CPT_FONTS_OPT = _load("cpt_fonts_optional_given_class.json")["table"]
|
||||||
|
# Browsing-history pool + CPT (per-class probabilities for visited sites).
|
||||||
|
# Drives _recaptcha_seed's cookie pre-seed: each persona ends up with a
|
||||||
|
# coherent list of ~15-30 visited sites whose categories correlate with
|
||||||
|
# gpu_class (workstation → dev-heavy, integrated_old → shop+news-heavy).
|
||||||
|
_BROWSING_POOL: list = _load("browsing_pool.json")["entries"]
|
||||||
|
_CPT_BROWSING = _load("cpt_browsing_given_class.json")["table"]
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
@ -282,6 +288,33 @@ def derive_font_whitelist(gpu_class: str, rng) -> str:
|
||||||
return derive_font_prefs(gpu_class, rng)["whitelist"]
|
return derive_font_prefs(gpu_class, rng)["whitelist"]
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
|
# BROWSING HISTORY (Bayesian: per-site P(visited|gpu_class))
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
|
def derive_browsing_history(gpu_class: str, rng) -> list:
|
||||||
|
"""Sample which sites this persona has visited recently.
|
||||||
|
|
||||||
|
Each site in the pool has a per-class probability (CPT). We sample
|
||||||
|
independently per-site, producing a list of dicts:
|
||||||
|
[{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, ...]
|
||||||
|
|
||||||
|
Sum of CPT probabilities per class is tuned to land ~15-30 visited sites
|
||||||
|
on average — an established-user signature. Sorted by name for stable
|
||||||
|
output across runs of the same seed.
|
||||||
|
"""
|
||||||
|
cpt = _CPT_BROWSING.get(gpu_class)
|
||||||
|
if cpt is None:
|
||||||
|
cpt = _CPT_BROWSING["mid_range"]
|
||||||
|
visited: list = []
|
||||||
|
for entry in _BROWSING_POOL:
|
||||||
|
name = entry["name"]
|
||||||
|
p = cpt.get(name, 0.3) # default 0.3 for missing CPT row
|
||||||
|
if rng.random() < p:
|
||||||
|
visited.append(dict(entry)) # copy to avoid mutating pool
|
||||||
|
visited.sort(key=lambda e: e["name"])
|
||||||
|
return visited
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
# PUBLIC API: Forge
|
# PUBLIC API: Forge
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
@ -350,6 +383,12 @@ class Forge:
|
||||||
bundle["gpu_class"], self._rng
|
bundle["gpu_class"], self._rng
|
||||||
).items()
|
).items()
|
||||||
},
|
},
|
||||||
|
# Bayesian browsing history (per-class P(visited|gpu_class)).
|
||||||
|
# Consumed by _recaptcha_seed.py to seed coherent cookie history
|
||||||
|
# when invisible_playwright is launched with prep_recaptcha=True.
|
||||||
|
"browsing_history": derive_browsing_history(
|
||||||
|
bundle["gpu_class"], self._rng
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
64
src/invisible_playwright/_fpforge/data/browsing_pool.json
Normal file
64
src/invisible_playwright/_fpforge/data/browsing_pool.json
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
{
|
||||||
|
"_comment": [
|
||||||
|
"Pool of everyday websites used by the browsing_history node.",
|
||||||
|
"Each entry: { name, category, cookie_profile }.",
|
||||||
|
"- name: bare domain (no scheme, no leading dot).",
|
||||||
|
"- category: dev / shop / news / reference / media / community / misc.",
|
||||||
|
"- cookie_profile: short tag pointing to a cookie-template recipe used by",
|
||||||
|
" _recaptcha_seed.py to generate concrete cookies (so heavy-analytics sites",
|
||||||
|
" get _ga+_gid+OneTrust, simple sites get just _ga, dev tools get GH-style).",
|
||||||
|
"Add new entries here + add per-class probabilities in cpt_browsing_given_class.json."
|
||||||
|
],
|
||||||
|
"entries": [
|
||||||
|
{"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"},
|
||||||
|
{"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"},
|
||||||
|
{"name": "mozilla.org", "category": "reference", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "w3schools.com", "category": "dev", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "mdn.io", "category": "dev", "cookie_profile": "minimal"},
|
||||||
|
{"name": "duckduckgo.com", "category": "reference", "cookie_profile": "minimal"},
|
||||||
|
{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"},
|
||||||
|
{"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "npmjs.com", "category": "dev", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "gitlab.com", "category": "dev", "cookie_profile": "ga_cf"},
|
||||||
|
{"name": "pypi.org", "category": "dev", "cookie_profile": "minimal"},
|
||||||
|
{"name": "docs.python.org", "category": "dev", "cookie_profile": "minimal"},
|
||||||
|
{"name": "rust-lang.org", "category": "dev", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "go.dev", "category": "dev", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "ebay.com", "category": "shop", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "etsy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "bestbuy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "target.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "nytimes.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "cnn.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "bbc.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "theguardian.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "reuters.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "apnews.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "washingtonpost.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "techcrunch.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "theverge.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "arstechnica.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "wired.com", "category": "news", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "engadget.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "9to5mac.com", "category": "news", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "medium.com", "category": "community", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "dev.to", "category": "community", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "reddit.com", "category": "community", "cookie_profile": "ga_cf"},
|
||||||
|
{"name": "news.ycombinator.com", "category": "community", "cookie_profile": "minimal"},
|
||||||
|
{"name": "quora.com", "category": "community", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "stackexchange.com", "category": "community", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "imdb.com", "category": "media", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "rottentomatoes.com", "category": "media", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "metacritic.com", "category": "media", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "allrecipes.com", "category": "misc", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "epicurious.com", "category": "misc", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "tripadvisor.com", "category": "misc", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "weather.com", "category": "reference", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "timeanddate.com", "category": "reference", "cookie_profile": "ga_consent"},
|
||||||
|
{"name": "thesaurus.com", "category": "reference", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "kayak.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "booking.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "airbnb.com", "category": "shop", "cookie_profile": "ga_consent"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,138 @@
|
||||||
|
{
|
||||||
|
"_comment": [
|
||||||
|
"Per-class probability that a persona of a given gpu_class has visited each",
|
||||||
|
"site in the pool. Used by the browsing_history node to derive a coherent",
|
||||||
|
"visited-domain list per persona.",
|
||||||
|
"",
|
||||||
|
"Probabilities are tuned so each class samples ~15-30 sites on average",
|
||||||
|
"(sum across all 50 entries falls in that range), giving an established-user",
|
||||||
|
"look. Categories are biased by class:",
|
||||||
|
" - workstation/high_end: higher P(dev) + high P(news/media)",
|
||||||
|
" - mid_range: balanced",
|
||||||
|
" - low_end/integrated_*: lower P(dev), higher P(shop/news/reference)",
|
||||||
|
"",
|
||||||
|
"Missing class falls back to mid_range via Node CPT pool fallback."
|
||||||
|
],
|
||||||
|
"table": {
|
||||||
|
"workstation": {
|
||||||
|
"youtube.com": 0.80, "wikipedia.org": 0.85, "mozilla.org": 0.70,
|
||||||
|
"w3schools.com": 0.40, "mdn.io": 0.55, "duckduckgo.com": 0.45,
|
||||||
|
"github.com": 0.95, "stackoverflow.com": 0.90, "npmjs.com": 0.65,
|
||||||
|
"gitlab.com": 0.50, "pypi.org": 0.55, "docs.python.org": 0.60,
|
||||||
|
"rust-lang.org": 0.35, "go.dev": 0.30,
|
||||||
|
"amazon.com": 0.70, "ebay.com": 0.25, "etsy.com": 0.15,
|
||||||
|
"bestbuy.com": 0.45, "target.com": 0.30,
|
||||||
|
"nytimes.com": 0.55, "cnn.com": 0.40, "bbc.com": 0.55,
|
||||||
|
"theguardian.com": 0.45, "reuters.com": 0.40, "apnews.com": 0.30,
|
||||||
|
"washingtonpost.com": 0.40,
|
||||||
|
"techcrunch.com": 0.65, "theverge.com": 0.60, "arstechnica.com": 0.65,
|
||||||
|
"wired.com": 0.50, "engadget.com": 0.35, "9to5mac.com": 0.30,
|
||||||
|
"medium.com": 0.55, "dev.to": 0.40, "reddit.com": 0.70,
|
||||||
|
"news.ycombinator.com": 0.65, "quora.com": 0.20, "stackexchange.com": 0.60,
|
||||||
|
"imdb.com": 0.45, "rottentomatoes.com": 0.25, "metacritic.com": 0.20,
|
||||||
|
"allrecipes.com": 0.20, "epicurious.com": 0.15, "tripadvisor.com": 0.30,
|
||||||
|
"weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25,
|
||||||
|
"kayak.com": 0.30, "booking.com": 0.35, "airbnb.com": 0.30
|
||||||
|
},
|
||||||
|
"high_end": {
|
||||||
|
"youtube.com": 0.85, "wikipedia.org": 0.80, "mozilla.org": 0.60,
|
||||||
|
"w3schools.com": 0.45, "mdn.io": 0.45, "duckduckgo.com": 0.40,
|
||||||
|
"github.com": 0.85, "stackoverflow.com": 0.80, "npmjs.com": 0.50,
|
||||||
|
"gitlab.com": 0.40, "pypi.org": 0.45, "docs.python.org": 0.50,
|
||||||
|
"rust-lang.org": 0.30, "go.dev": 0.25,
|
||||||
|
"amazon.com": 0.75, "ebay.com": 0.30, "etsy.com": 0.20,
|
||||||
|
"bestbuy.com": 0.50, "target.com": 0.35,
|
||||||
|
"nytimes.com": 0.50, "cnn.com": 0.50, "bbc.com": 0.50,
|
||||||
|
"theguardian.com": 0.40, "reuters.com": 0.35, "apnews.com": 0.30,
|
||||||
|
"washingtonpost.com": 0.35,
|
||||||
|
"techcrunch.com": 0.60, "theverge.com": 0.65, "arstechnica.com": 0.60,
|
||||||
|
"wired.com": 0.50, "engadget.com": 0.40, "9to5mac.com": 0.35,
|
||||||
|
"medium.com": 0.50, "dev.to": 0.35, "reddit.com": 0.75,
|
||||||
|
"news.ycombinator.com": 0.55, "quora.com": 0.25, "stackexchange.com": 0.55,
|
||||||
|
"imdb.com": 0.55, "rottentomatoes.com": 0.35, "metacritic.com": 0.30,
|
||||||
|
"allrecipes.com": 0.25, "epicurious.com": 0.20, "tripadvisor.com": 0.30,
|
||||||
|
"weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25,
|
||||||
|
"kayak.com": 0.30, "booking.com": 0.40, "airbnb.com": 0.30
|
||||||
|
},
|
||||||
|
"mid_range": {
|
||||||
|
"youtube.com": 0.85, "wikipedia.org": 0.75, "mozilla.org": 0.45,
|
||||||
|
"w3schools.com": 0.40, "mdn.io": 0.30, "duckduckgo.com": 0.35,
|
||||||
|
"github.com": 0.55, "stackoverflow.com": 0.55, "npmjs.com": 0.30,
|
||||||
|
"gitlab.com": 0.25, "pypi.org": 0.25, "docs.python.org": 0.30,
|
||||||
|
"rust-lang.org": 0.15, "go.dev": 0.15,
|
||||||
|
"amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30,
|
||||||
|
"bestbuy.com": 0.55, "target.com": 0.40,
|
||||||
|
"nytimes.com": 0.45, "cnn.com": 0.55, "bbc.com": 0.45,
|
||||||
|
"theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30,
|
||||||
|
"washingtonpost.com": 0.30,
|
||||||
|
"techcrunch.com": 0.45, "theverge.com": 0.50, "arstechnica.com": 0.40,
|
||||||
|
"wired.com": 0.45, "engadget.com": 0.35, "9to5mac.com": 0.30,
|
||||||
|
"medium.com": 0.45, "dev.to": 0.25, "reddit.com": 0.70,
|
||||||
|
"news.ycombinator.com": 0.30, "quora.com": 0.35, "stackexchange.com": 0.40,
|
||||||
|
"imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.35,
|
||||||
|
"allrecipes.com": 0.35, "epicurious.com": 0.25, "tripadvisor.com": 0.40,
|
||||||
|
"weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30,
|
||||||
|
"kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40
|
||||||
|
},
|
||||||
|
"low_end": {
|
||||||
|
"youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.35,
|
||||||
|
"w3schools.com": 0.30, "mdn.io": 0.20, "duckduckgo.com": 0.30,
|
||||||
|
"github.com": 0.30, "stackoverflow.com": 0.30, "npmjs.com": 0.15,
|
||||||
|
"gitlab.com": 0.10, "pypi.org": 0.10, "docs.python.org": 0.15,
|
||||||
|
"rust-lang.org": 0.05, "go.dev": 0.05,
|
||||||
|
"amazon.com": 0.85, "ebay.com": 0.50, "etsy.com": 0.40,
|
||||||
|
"bestbuy.com": 0.55, "target.com": 0.45,
|
||||||
|
"nytimes.com": 0.40, "cnn.com": 0.60, "bbc.com": 0.40,
|
||||||
|
"theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.30,
|
||||||
|
"washingtonpost.com": 0.25,
|
||||||
|
"techcrunch.com": 0.30, "theverge.com": 0.35, "arstechnica.com": 0.25,
|
||||||
|
"wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25,
|
||||||
|
"medium.com": 0.35, "dev.to": 0.15, "reddit.com": 0.65,
|
||||||
|
"news.ycombinator.com": 0.15, "quora.com": 0.45, "stackexchange.com": 0.25,
|
||||||
|
"imdb.com": 0.65, "rottentomatoes.com": 0.45, "metacritic.com": 0.35,
|
||||||
|
"allrecipes.com": 0.45, "epicurious.com": 0.30, "tripadvisor.com": 0.45,
|
||||||
|
"weather.com": 0.65, "timeanddate.com": 0.25, "thesaurus.com": 0.35,
|
||||||
|
"kayak.com": 0.35, "booking.com": 0.50, "airbnb.com": 0.40
|
||||||
|
},
|
||||||
|
"integrated_modern": {
|
||||||
|
"youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.40,
|
||||||
|
"w3schools.com": 0.35, "mdn.io": 0.25, "duckduckgo.com": 0.35,
|
||||||
|
"github.com": 0.40, "stackoverflow.com": 0.40, "npmjs.com": 0.20,
|
||||||
|
"gitlab.com": 0.15, "pypi.org": 0.20, "docs.python.org": 0.20,
|
||||||
|
"rust-lang.org": 0.10, "go.dev": 0.10,
|
||||||
|
"amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30,
|
||||||
|
"bestbuy.com": 0.50, "target.com": 0.40,
|
||||||
|
"nytimes.com": 0.40, "cnn.com": 0.55, "bbc.com": 0.45,
|
||||||
|
"theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30,
|
||||||
|
"washingtonpost.com": 0.30,
|
||||||
|
"techcrunch.com": 0.40, "theverge.com": 0.45, "arstechnica.com": 0.30,
|
||||||
|
"wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25,
|
||||||
|
"medium.com": 0.40, "dev.to": 0.20, "reddit.com": 0.65,
|
||||||
|
"news.ycombinator.com": 0.25, "quora.com": 0.40, "stackexchange.com": 0.35,
|
||||||
|
"imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.30,
|
||||||
|
"allrecipes.com": 0.40, "epicurious.com": 0.25, "tripadvisor.com": 0.40,
|
||||||
|
"weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30,
|
||||||
|
"kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40
|
||||||
|
},
|
||||||
|
"integrated_old": {
|
||||||
|
"youtube.com": 0.75, "wikipedia.org": 0.65, "mozilla.org": 0.30,
|
||||||
|
"w3schools.com": 0.20, "mdn.io": 0.10, "duckduckgo.com": 0.25,
|
||||||
|
"github.com": 0.15, "stackoverflow.com": 0.20, "npmjs.com": 0.05,
|
||||||
|
"gitlab.com": 0.05, "pypi.org": 0.05, "docs.python.org": 0.10,
|
||||||
|
"rust-lang.org": 0.02, "go.dev": 0.02,
|
||||||
|
"amazon.com": 0.85, "ebay.com": 0.55, "etsy.com": 0.45,
|
||||||
|
"bestbuy.com": 0.55, "target.com": 0.50,
|
||||||
|
"nytimes.com": 0.45, "cnn.com": 0.65, "bbc.com": 0.40,
|
||||||
|
"theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.35,
|
||||||
|
"washingtonpost.com": 0.30,
|
||||||
|
"techcrunch.com": 0.20, "theverge.com": 0.25, "arstechnica.com": 0.15,
|
||||||
|
"wired.com": 0.30, "engadget.com": 0.20, "9to5mac.com": 0.20,
|
||||||
|
"medium.com": 0.30, "dev.to": 0.05, "reddit.com": 0.55,
|
||||||
|
"news.ycombinator.com": 0.05, "quora.com": 0.55, "stackexchange.com": 0.15,
|
||||||
|
"imdb.com": 0.70, "rottentomatoes.com": 0.50, "metacritic.com": 0.35,
|
||||||
|
"allrecipes.com": 0.55, "epicurious.com": 0.35, "tripadvisor.com": 0.50,
|
||||||
|
"weather.com": 0.70, "timeanddate.com": 0.30, "thesaurus.com": 0.40,
|
||||||
|
"kayak.com": 0.40, "booking.com": 0.55, "airbnb.com": 0.40
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -120,6 +120,11 @@ class Profile:
|
||||||
webgl: WebGLProfile
|
webgl: WebGLProfile
|
||||||
fonts: List[str]
|
fonts: List[str]
|
||||||
dark_theme: bool
|
dark_theme: bool
|
||||||
|
# Bayesian browsing-history: list of {name, category, cookie_profile}
|
||||||
|
# dicts sampled from data/browsing_pool.json with per-class CPT. Used
|
||||||
|
# by _recaptcha_seed.py to build a coherent cookie pre-seed when the
|
||||||
|
# caller opts in via Stealthfox(prep_recaptcha=True).
|
||||||
|
browsing_history: List[Dict[str, str]] = field(default_factory=list)
|
||||||
_raw: Dict[str, Any] = field(default_factory=dict, repr=False, compare=False)
|
_raw: Dict[str, Any] = field(default_factory=dict, repr=False, compare=False)
|
||||||
|
|
||||||
def to_prefs_dict(self) -> Dict[str, Any]:
|
def to_prefs_dict(self) -> Dict[str, Any]:
|
||||||
|
|
@ -255,5 +260,6 @@ def generate_profile(seed: int, pin: Optional[Dict[str, Any]] = None) -> Profile
|
||||||
webgl=WebGLProfile(msaa_samples=int(raw["msaa_samples"])),
|
webgl=WebGLProfile(msaa_samples=int(raw["msaa_samples"])),
|
||||||
fonts=fonts,
|
fonts=fonts,
|
||||||
dark_theme=bool(raw["dark_theme"]),
|
dark_theme=bool(raw["dark_theme"]),
|
||||||
|
browsing_history=list(raw.get("browsing_history") or []),
|
||||||
_raw=raw,
|
_raw=raw,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
164
src/invisible_playwright/_geo.py
Normal file
164
src/invisible_playwright/_geo.py
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
"""Resolve the session timezone from the egress IP (``timezone="auto"``).
|
||||||
|
|
||||||
|
Approach B: discover the egress IP with one HTTP request — routed *through the
|
||||||
|
proxy* when one is set, otherwise a direct request that sees the host's own
|
||||||
|
public IP — then map IP → IANA timezone with an offline mmdb
|
||||||
|
(``daijro/geoip-all-in-one``, downloaded + cached by ``download.py``).
|
||||||
|
|
||||||
|
Precedence (see ``resolve_session_timezone``):
|
||||||
|
|
||||||
|
explicit IANA → unchanged explicit always wins
|
||||||
|
"" / "auto" → egress ALWAYS resolve. With a proxy, from the proxy
|
||||||
|
egress IP; without a proxy, from the host's
|
||||||
|
own public IP. This is the default.
|
||||||
|
|
||||||
|
On failure:
|
||||||
|
with a proxy → raise a foreign proxy paired with the host TZ is
|
||||||
|
the precise ``timezone_mismatch`` signal, so
|
||||||
|
we fail loudly rather than fall back silently.
|
||||||
|
without a proxy → "" (host) the host TZ is a safe default, so a transient
|
||||||
|
lookup failure must not break the launch.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class GeoTimezoneError(RuntimeError):
|
||||||
|
"""Raised when ``timezone="auto"`` cannot resolve a valid IANA zone."""
|
||||||
|
|
||||||
|
|
||||||
|
# Plain-text IP echo endpoints (each returns just the caller's public IP).
|
||||||
|
_IP_ECHO_ENDPOINTS = (
|
||||||
|
"https://api.ipify.org",
|
||||||
|
"https://icanhazip.com",
|
||||||
|
"https://checkip.amazonaws.com",
|
||||||
|
)
|
||||||
|
|
||||||
|
_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://")
|
||||||
|
|
||||||
|
|
||||||
|
def _proxy_is_set(proxy: Optional[Dict[str, str]]) -> bool:
|
||||||
|
if not proxy:
|
||||||
|
return False
|
||||||
|
server = (proxy.get("server") or "").strip()
|
||||||
|
return bool(server) and server.lower() != "direct://"
|
||||||
|
|
||||||
|
|
||||||
|
def _proxies_for_requests(proxy: Dict[str, str]) -> Dict[str, str]:
|
||||||
|
"""Translate our proxy dict into a ``requests`` proxies mapping.
|
||||||
|
|
||||||
|
SOCKS5 uses the ``socks5h`` scheme so DNS is resolved proxy-side (matches
|
||||||
|
``network.proxy.socks_remote_dns=True`` in the Firefox path). HTTP/HTTPS
|
||||||
|
pass through unchanged. Credentials are URL-encoded.
|
||||||
|
"""
|
||||||
|
server = (proxy.get("server") or "").strip()
|
||||||
|
low = server.lower()
|
||||||
|
if low.startswith("socks5://") or low.startswith("socks://"):
|
||||||
|
scheme = "socks5h"
|
||||||
|
elif low.startswith("socks4://"):
|
||||||
|
scheme = "socks4"
|
||||||
|
elif low.startswith("https://"):
|
||||||
|
scheme = "https"
|
||||||
|
else:
|
||||||
|
scheme = "http"
|
||||||
|
|
||||||
|
host_port = server.split("://", 1)[1] if "://" in server else server
|
||||||
|
user = proxy.get("username") or ""
|
||||||
|
pwd = proxy.get("password") or ""
|
||||||
|
if user:
|
||||||
|
auth = f"{quote(user, safe='')}:{quote(pwd, safe='')}@"
|
||||||
|
else:
|
||||||
|
auth = ""
|
||||||
|
url = f"{scheme}://{auth}{host_port}"
|
||||||
|
return {"http": url, "https": url}
|
||||||
|
|
||||||
|
|
||||||
|
def discover_egress_ip(
|
||||||
|
proxy: Optional[Dict[str, str]] = None, *, timeout: float = 10.0
|
||||||
|
) -> str:
|
||||||
|
"""Return the public egress IP.
|
||||||
|
|
||||||
|
Routes the request through ``proxy`` when given (SOCKS support requires
|
||||||
|
``requests[socks]`` / PySocks); with ``proxy=None`` it makes a direct
|
||||||
|
request that sees the host's own public IP. Tries each echo endpoint in
|
||||||
|
turn; raises :class:`GeoTimezoneError` if none return a valid IP.
|
||||||
|
"""
|
||||||
|
proxies = _proxies_for_requests(proxy) if proxy else None
|
||||||
|
last_err: Optional[Exception] = None
|
||||||
|
for url in _IP_ECHO_ENDPOINTS:
|
||||||
|
try:
|
||||||
|
resp = requests.get(url, proxies=proxies, timeout=timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
ip = resp.text.strip()
|
||||||
|
ipaddress.ip_address(ip) # validate (raises ValueError if not an IP)
|
||||||
|
return ip
|
||||||
|
except Exception as exc: # noqa: BLE001 - try the next endpoint
|
||||||
|
last_err = exc
|
||||||
|
continue
|
||||||
|
raise GeoTimezoneError(
|
||||||
|
f"could not discover the proxy egress IP via {len(_IP_ECHO_ENDPOINTS)} "
|
||||||
|
f"endpoints (last error: {last_err!r}). For SOCKS proxies make sure "
|
||||||
|
f"requests[socks] / PySocks is installed."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ip_to_timezone(ip: str, mmdb_path: Any) -> str:
|
||||||
|
"""Map ``ip`` to its IANA timezone using the offline mmdb.
|
||||||
|
|
||||||
|
Reads the standard MaxMind ``location.time_zone`` field and validates it
|
||||||
|
against the system tz database. Raises :class:`GeoTimezoneError` if the IP
|
||||||
|
is absent from the DB or the zone is missing / not a valid IANA name.
|
||||||
|
"""
|
||||||
|
import maxminddb
|
||||||
|
|
||||||
|
with maxminddb.open_database(str(mmdb_path)) as reader:
|
||||||
|
record = reader.get(ip)
|
||||||
|
if not record:
|
||||||
|
raise GeoTimezoneError(f"egress IP {ip} not present in the geoip database")
|
||||||
|
tz = ((record.get("location") or {}) if isinstance(record, dict) else {}).get(
|
||||||
|
"time_zone"
|
||||||
|
)
|
||||||
|
if not tz:
|
||||||
|
raise GeoTimezoneError(f"no timezone for egress IP {ip} in the geoip database")
|
||||||
|
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
||||||
|
|
||||||
|
try:
|
||||||
|
ZoneInfo(tz)
|
||||||
|
except (ZoneInfoNotFoundError, ValueError) as exc:
|
||||||
|
raise GeoTimezoneError(
|
||||||
|
f"geoip returned an invalid IANA zone {tz!r} for {ip}: {exc}"
|
||||||
|
) from exc
|
||||||
|
return tz
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_session_timezone(
|
||||||
|
timezone: str, proxy: Optional[Dict[str, str]]
|
||||||
|
) -> str:
|
||||||
|
"""Map the user's ``timezone`` setting to a concrete IANA zone (or ``""``).
|
||||||
|
|
||||||
|
See the module docstring for the full precedence table. ``""``/``"auto"``
|
||||||
|
ALWAYS resolve from the egress IP (proxy egress if a proxy is set, else the
|
||||||
|
host's own public IP). On failure: with a proxy we raise
|
||||||
|
:class:`GeoTimezoneError` (never silently use the host TZ behind a foreign
|
||||||
|
proxy); without a proxy we fall back to ``""`` (host TZ) so a transient
|
||||||
|
lookup failure can't break the launch.
|
||||||
|
"""
|
||||||
|
tz = (timezone or "").strip()
|
||||||
|
if tz and tz.lower() != "auto":
|
||||||
|
return tz # explicit IANA wins
|
||||||
|
# "" or "auto" → always resolve from the egress IP.
|
||||||
|
from .download import ensure_geoip_mmdb
|
||||||
|
|
||||||
|
proxy_set = _proxy_is_set(proxy)
|
||||||
|
try:
|
||||||
|
ip = discover_egress_ip(proxy if proxy_set else None)
|
||||||
|
return ip_to_timezone(ip, ensure_geoip_mmdb())
|
||||||
|
except Exception:
|
||||||
|
if proxy_set:
|
||||||
|
raise # fail-early behind a proxy (timezone_mismatch trap)
|
||||||
|
return "" # no proxy: host TZ is a safe fallback
|
||||||
340
src/invisible_playwright/_recaptcha_seed.py
Normal file
340
src/invisible_playwright/_recaptcha_seed.py
Normal file
|
|
@ -0,0 +1,340 @@
|
||||||
|
"""Deterministic reCAPTCHA cookie pre-seed.
|
||||||
|
|
||||||
|
Consumes the Bayesian-sampled `browsing_history` from the persona Profile
|
||||||
|
(see `_fpforge/_sampler.py:derive_browsing_history`). For each visited
|
||||||
|
site, builds 1-5 realistic cookies whose composition is chosen by the
|
||||||
|
site's `cookie_profile` tag (analytics-only / consent / cloudflare-bot-
|
||||||
|
management / etc.). All values seeded deterministically from the persona
|
||||||
|
seed, so a given persona always presents the SAME cookies across sessions.
|
||||||
|
|
||||||
|
In addition, always seeds 5 cookies on .google.com (NID, CONSENT, SOCS,
|
||||||
|
_GRECAPTCHA, ENID). Excludes 1P_JAR which was deprecated by Google in 2022
|
||||||
|
— including it now is an anachronism flag.
|
||||||
|
|
||||||
|
Public API:
|
||||||
|
await seed_recaptcha_cookies_async(context, profile, timezone=None)
|
||||||
|
seed_recaptcha_cookies_sync(context, profile, timezone=None)
|
||||||
|
|
||||||
|
`profile` is an `_fpforge.Profile`; `timezone` is the IANA tz (e.g.
|
||||||
|
"Europe/Rome") used to derive the CONSENT cookie's language token, so a
|
||||||
|
European-tz persona gets CONSENT in their language not en+FX.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
# URL-safe base64 alphabet (no padding chars).
|
||||||
|
_B64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
||||||
|
_HEX_ALPHABET = "0123456789abcdef"
|
||||||
|
|
||||||
|
|
||||||
|
def _sub_seed(seed: int, tag: str) -> int:
|
||||||
|
"""FNV-1a mix → independent PRNG streams per logical bucket from one seed."""
|
||||||
|
h = 0xcbf29ce484222325 ^ (seed & 0xFFFFFFFF)
|
||||||
|
for c in tag.encode("ascii"):
|
||||||
|
h ^= c
|
||||||
|
h = (h * 0x100000001b3) & 0xFFFFFFFFFFFFFFFF
|
||||||
|
return h or 0xdeadbeef
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_rand(rng: random.Random, length: int) -> str:
|
||||||
|
return "".join(rng.choice(_B64_ALPHABET) for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
def _hex_rand(rng: random.Random, length: int) -> str:
|
||||||
|
return "".join(rng.choice(_HEX_ALPHABET) for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
def _yyyymmdd_utc(ts: int) -> str:
|
||||||
|
return datetime.datetime.utcfromtimestamp(ts).strftime("%Y%m%d")
|
||||||
|
|
||||||
|
|
||||||
|
# IANA timezone -> (country_code, lang) for CONSENT cookie coherence.
|
||||||
|
# Real EU users get CONSENT with `<lang>+<COUNTRY>+NNN`; non-EU gets `en+FX+NNN`.
|
||||||
|
# Default fallback `en+FX+NNN` for any tz not in this map.
|
||||||
|
_TZ_TO_REGION = {
|
||||||
|
"Europe/Rome": ("IT", "it"),
|
||||||
|
"Europe/Berlin": ("DE", "de"),
|
||||||
|
"Europe/Paris": ("FR", "fr"),
|
||||||
|
"Europe/Madrid": ("ES", "es"),
|
||||||
|
"Europe/London": ("GB", "en"),
|
||||||
|
"Europe/Amsterdam": ("NL", "nl"),
|
||||||
|
"Europe/Brussels": ("BE", "fr"),
|
||||||
|
"Europe/Vienna": ("AT", "de"),
|
||||||
|
"Europe/Zurich": ("CH", "de"),
|
||||||
|
"Europe/Dublin": ("IE", "en"),
|
||||||
|
"Europe/Lisbon": ("PT", "pt"),
|
||||||
|
"Europe/Stockholm": ("SE", "sv"),
|
||||||
|
"Europe/Oslo": ("NO", "no"),
|
||||||
|
"Europe/Copenhagen": ("DK", "da"),
|
||||||
|
"Europe/Helsinki": ("FI", "fi"),
|
||||||
|
"Europe/Warsaw": ("PL", "pl"),
|
||||||
|
"Europe/Prague": ("CZ", "cs"),
|
||||||
|
"Europe/Athens": ("GR", "el"),
|
||||||
|
"Asia/Tokyo": ("FX", "ja"),
|
||||||
|
"Asia/Shanghai": ("FX", "zh"),
|
||||||
|
"Asia/Hong_Kong": ("FX", "zh"),
|
||||||
|
"Asia/Seoul": ("FX", "ko"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _consent_region_lang(timezone: Optional[str]) -> tuple:
|
||||||
|
"""Map IANA tz → (region_token, lang_2char) for CONSENT cookie.
|
||||||
|
Default `("FX", "en")` for US/unknown."""
|
||||||
|
if timezone and timezone in _TZ_TO_REGION:
|
||||||
|
return _TZ_TO_REGION[timezone]
|
||||||
|
return ("FX", "en")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# .google.com cookie batch (always present, regardless of browsing history)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _google_cookies(rng: random.Random, now: int,
|
||||||
|
timezone: Optional[str] = None) -> List[dict]:
|
||||||
|
consent_age = rng.randint(60, 720) * 86400
|
||||||
|
region, lang = _consent_region_lang(timezone)
|
||||||
|
# NID 3-digit prefix range broadened to 100-540 to cover historical NID
|
||||||
|
# versions (137, 105, 511, 525 etc. observed in real captures).
|
||||||
|
return [
|
||||||
|
{"name": "NID",
|
||||||
|
"value": f"{rng.randint(100, 540)}={_b64_rand(rng, 178)}",
|
||||||
|
"domain": ".google.com", "path": "/",
|
||||||
|
"expires": now + 180 * 86400,
|
||||||
|
"httpOnly": True, "secure": True, "sameSite": "None"},
|
||||||
|
{"name": "CONSENT",
|
||||||
|
"value": f"YES+cb.{_yyyymmdd_utc(now - consent_age)}-"
|
||||||
|
f"{rng.randint(10, 19):02d}-p{rng.randint(0, 9)}."
|
||||||
|
f"{lang}+{region}+{rng.randint(100, 999)}",
|
||||||
|
"domain": ".google.com", "path": "/",
|
||||||
|
"expires": now + 395 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"},
|
||||||
|
# 1P_JAR removed: Google deprecated it in 2022. Including it now is
|
||||||
|
# an anachronism flag for fingerprinters that look at cookie freshness.
|
||||||
|
{"name": "SOCS",
|
||||||
|
"value": f"CAES{_b64_rand(rng, 56)}",
|
||||||
|
"domain": ".google.com", "path": "/",
|
||||||
|
"expires": now + 395 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"},
|
||||||
|
{"name": "_GRECAPTCHA",
|
||||||
|
"value": _b64_rand(rng, 124),
|
||||||
|
"domain": ".google.com", "path": "/",
|
||||||
|
"expires": now + 180 * 86400,
|
||||||
|
"secure": True, "sameSite": "None"},
|
||||||
|
{"name": "ENID",
|
||||||
|
"value": _b64_rand(rng, 252),
|
||||||
|
"domain": ".google.com", "path": "/",
|
||||||
|
"expires": now + 395 * 86400,
|
||||||
|
"httpOnly": True, "secure": True, "sameSite": "Lax"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Per-site cookie generators (recipes keyed by site["cookie_profile"])
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _norm_domain(domain: str) -> str:
|
||||||
|
return domain if domain.startswith(".") else "." + domain
|
||||||
|
|
||||||
|
|
||||||
|
def _ga_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
first_age = rng.randint(7, 395) * 86400
|
||||||
|
return {"name": "_ga",
|
||||||
|
"value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - first_age}",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 395 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _gid_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
return {"name": "_gid",
|
||||||
|
"value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - rng.randint(60, 86400)}",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _cf_bm_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
return {"name": "__cf_bm",
|
||||||
|
"value": f"{_b64_rand(rng, 43)}.{rng.randint(1700000000, now)}-1-1-1-1",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 1800,
|
||||||
|
"secure": True, "sameSite": "None"}
|
||||||
|
|
||||||
|
|
||||||
|
def _onetrust_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
age_d = rng.randint(7, 365)
|
||||||
|
iso = datetime.datetime.utcfromtimestamp(now - age_d * 86400).strftime(
|
||||||
|
"%Y-%m-%dT%H:%M:%S.000Z"
|
||||||
|
)
|
||||||
|
return {"name": "OptanonAlertBoxClosed",
|
||||||
|
"value": iso,
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 395 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _cookieyes_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
return {"name": "cookieyes-consent",
|
||||||
|
"value": "consentid:" + _b64_rand(rng, 28) +
|
||||||
|
",consent:yes,action:yes,necessary:yes,functional:yes,analytics:yes",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 395 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _clarity_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
return {"name": "_clck",
|
||||||
|
"value": f"{_hex_rand(rng, 8)}|2|f{rng.randint(10, 99)}|0|"
|
||||||
|
f"{now - rng.randint(60, 180) * 86400}",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 365 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _fbp_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
"""Facebook Pixel _fbp = fb.<subdomain_index>.<unix_ms>.<random_int>"""
|
||||||
|
return {"name": "_fbp",
|
||||||
|
"value": f"fb.1.{(now - rng.randint(60, 30*86400)) * 1000}."
|
||||||
|
f"{rng.randint(100000000, 9999999999)}",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 90 * 86400,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _gtm_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
"""_dc_gtm_<container_id>=1 — Google Tag Manager throttle flag."""
|
||||||
|
container = f"UA-{rng.randint(10000000, 99999999)}-{rng.randint(1, 9)}"
|
||||||
|
return {"name": f"_dc_gtm_{container}",
|
||||||
|
"value": "1",
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 60,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _hssrc_cookie(rng: random.Random, now: int, domain: str) -> dict:
|
||||||
|
"""HubSpot referrer flag — small int."""
|
||||||
|
return {"name": "__hssrc",
|
||||||
|
"value": str(rng.randint(1, 5)),
|
||||||
|
"domain": domain, "path": "/",
|
||||||
|
"expires": now + 1800,
|
||||||
|
"secure": True, "sameSite": "Lax"}
|
||||||
|
|
||||||
|
|
||||||
|
def _cookies_for_profile(profile: str, rng: random.Random,
|
||||||
|
now: int, domain: str) -> List[dict]:
|
||||||
|
"""Map cookie_profile tag (from browsing_pool.json) → concrete cookies.
|
||||||
|
|
||||||
|
Each recipe is a realistic combination observed on real production sites
|
||||||
|
in that category. Cookie age and sub-recipe variance (e.g., OneTrust vs
|
||||||
|
CookieYes for consent banner) are deterministic from rng.
|
||||||
|
"""
|
||||||
|
domain = _norm_domain(domain)
|
||||||
|
if profile == "minimal":
|
||||||
|
return [_ga_cookie(rng, now, domain)]
|
||||||
|
if profile == "ga_only":
|
||||||
|
out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)]
|
||||||
|
# 30% chance of GTM helper paired with GA
|
||||||
|
if rng.random() < 0.3:
|
||||||
|
out.append(_gtm_cookie(rng, now, domain))
|
||||||
|
return out
|
||||||
|
if profile == "ga_cf":
|
||||||
|
return [_ga_cookie(rng, now, domain), _cf_bm_cookie(rng, now, domain)]
|
||||||
|
if profile == "ga_consent":
|
||||||
|
out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)]
|
||||||
|
out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5
|
||||||
|
else _cookieyes_cookie(rng, now, domain))
|
||||||
|
if rng.random() < 0.4:
|
||||||
|
out.append(_gtm_cookie(rng, now, domain))
|
||||||
|
return out
|
||||||
|
if profile == "ga_consent_clarity":
|
||||||
|
# Heavy-tracking site profile: GA + Clarity + consent + often FB pixel
|
||||||
|
out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain),
|
||||||
|
_clarity_cookie(rng, now, domain)]
|
||||||
|
out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5
|
||||||
|
else _cookieyes_cookie(rng, now, domain))
|
||||||
|
if rng.random() < 0.5:
|
||||||
|
out.append(_fbp_cookie(rng, now, domain))
|
||||||
|
if rng.random() < 0.4:
|
||||||
|
out.append(_gtm_cookie(rng, now, domain))
|
||||||
|
if rng.random() < 0.25:
|
||||||
|
out.append(_hssrc_cookie(rng, now, domain))
|
||||||
|
return out
|
||||||
|
# Unknown profile → safe fallback
|
||||||
|
return [_ga_cookie(rng, now, domain)]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public builder
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_cookies(seed: int,
|
||||||
|
browsing_history: Optional[List[dict]] = None,
|
||||||
|
now: Optional[int] = None,
|
||||||
|
timezone: Optional[str] = None) -> List[dict]:
|
||||||
|
"""Build the full cookie list for a persona.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seed: persona integer seed (from `Profile.seed`)
|
||||||
|
browsing_history: list of {name, category, cookie_profile} dicts as
|
||||||
|
sampled by `_fpforge.derive_browsing_history`. None → empty list
|
||||||
|
(only the 5 google cookies are returned).
|
||||||
|
now: unix-seconds timestamp; defaults to current time. Pin for tests.
|
||||||
|
timezone: IANA tz used to derive CONSENT cookie's `lang+region` token
|
||||||
|
(e.g. "Europe/Rome" → "it+IT", "America/New_York" → "en+FX").
|
||||||
|
"""
|
||||||
|
ts = now if now is not None else int(time.time())
|
||||||
|
cookies: List[dict] = []
|
||||||
|
|
||||||
|
# 5 .google.com cookies (always) — CONSENT lang derived from tz
|
||||||
|
rng_g = random.Random(_sub_seed(int(seed), "google"))
|
||||||
|
cookies.extend(_google_cookies(rng_g, ts, timezone=timezone))
|
||||||
|
|
||||||
|
# Per-site cookies (deterministic from seed × domain)
|
||||||
|
for site in (browsing_history or []):
|
||||||
|
rng_d = random.Random(_sub_seed(int(seed), f"dom:{site['name']}"))
|
||||||
|
cookies.extend(_cookies_for_profile(
|
||||||
|
site.get("cookie_profile", "minimal"), rng_d, ts, site["name"]
|
||||||
|
))
|
||||||
|
return cookies
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_seed_and_history(profile: Any) -> tuple:
|
||||||
|
"""Accept a Profile object OR a (seed, history) tuple OR just an int seed."""
|
||||||
|
if isinstance(profile, int):
|
||||||
|
return int(profile), []
|
||||||
|
seed = int(getattr(profile, "seed"))
|
||||||
|
history = list(getattr(profile, "browsing_history", []) or [])
|
||||||
|
return seed, history
|
||||||
|
|
||||||
|
|
||||||
|
async def seed_recaptcha_cookies_async(context: Any, profile: Any,
|
||||||
|
timezone: Optional[str] = None) -> None:
|
||||||
|
"""Async: inject deterministic persona cookies into the context."""
|
||||||
|
seed, history = _extract_seed_and_history(profile)
|
||||||
|
cookies = build_cookies(seed, history, timezone=timezone)
|
||||||
|
try:
|
||||||
|
await context.add_cookies(cookies)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def seed_recaptcha_cookies_sync(context: Any, profile: Any,
|
||||||
|
timezone: Optional[str] = None) -> None:
|
||||||
|
"""Sync: inject deterministic persona cookies into the context."""
|
||||||
|
seed, history = _extract_seed_and_history(profile)
|
||||||
|
cookies = build_cookies(seed, history, timezone=timezone)
|
||||||
|
try:
|
||||||
|
context.add_cookies(cookies)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_cookies",
|
||||||
|
"seed_recaptcha_cookies_async",
|
||||||
|
"seed_recaptcha_cookies_sync",
|
||||||
|
]
|
||||||
|
|
@ -9,6 +9,7 @@ from typing import Any, Dict, Optional, Union
|
||||||
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
||||||
|
|
||||||
from ._fpforge import Profile, generate_profile
|
from ._fpforge import Profile, generate_profile
|
||||||
|
from ._geo import resolve_session_timezone
|
||||||
from ._headless import make_virtual_display
|
from ._headless import make_virtual_display
|
||||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||||
from .download import ensure_binary
|
from .download import ensure_binary
|
||||||
|
|
@ -51,6 +52,7 @@ class InvisiblePlaywright:
|
||||||
extra_prefs: Optional[Dict[str, Any]] = None,
|
extra_prefs: Optional[Dict[str, Any]] = None,
|
||||||
binary_path: Optional[str] = None,
|
binary_path: Optional[str] = None,
|
||||||
profile_dir: Optional[Union[str, Path]] = None,
|
profile_dir: Optional[Union[str, Path]] = None,
|
||||||
|
prep_recaptcha: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
# See sync launcher: `zoom.stealth.fpp.hw_seed` is int32_t — clamp.
|
# See sync launcher: `zoom.stealth.fpp.hw_seed` is int32_t — clamp.
|
||||||
self.seed: int = int(seed) if seed is not None else secrets.randbits(31)
|
self.seed: int = int(seed) if seed is not None else secrets.randbits(31)
|
||||||
|
|
@ -64,6 +66,8 @@ class InvisiblePlaywright:
|
||||||
self._extra_prefs = extra_prefs
|
self._extra_prefs = extra_prefs
|
||||||
self._binary_path = binary_path
|
self._binary_path = binary_path
|
||||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||||
|
# reCAPTCHA pre-seed gated server-side; respect persistent profile.
|
||||||
|
self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None
|
||||||
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
||||||
self._pw: Optional[Playwright] = None
|
self._pw: Optional[Playwright] = None
|
||||||
self._browser: Optional[Browser] = None
|
self._browser: Optional[Browser] = None
|
||||||
|
|
@ -72,6 +76,13 @@ class InvisiblePlaywright:
|
||||||
|
|
||||||
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
||||||
import sys as _sys
|
import sys as _sys
|
||||||
|
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||||
|
# concrete IANA zone before anything reads self._timezone. Run the
|
||||||
|
# blocking geo lookup off the event loop. Fail-early if a proxy is set
|
||||||
|
# but the egress zone can't be resolved.
|
||||||
|
self._timezone = await asyncio.to_thread(
|
||||||
|
resolve_session_timezone, self._timezone, self._proxy
|
||||||
|
)
|
||||||
executable = self._binary_path or ensure_binary()
|
executable = self._binary_path or ensure_binary()
|
||||||
prefs = translate_profile_to_prefs(
|
prefs = translate_profile_to_prefs(
|
||||||
self._profile,
|
self._profile,
|
||||||
|
|
@ -124,12 +135,18 @@ class InvisiblePlaywright:
|
||||||
def _patch_new_context_defaults(self, browser: Browser) -> None:
|
def _patch_new_context_defaults(self, browser: Browser) -> None:
|
||||||
original = browser.new_context
|
original = browser.new_context
|
||||||
defaults = self._default_context_kwargs()
|
defaults = self._default_context_kwargs()
|
||||||
|
prep = self._prep_recaptcha
|
||||||
|
profile = self._profile # pass the whole Profile (seed + browsing_history)
|
||||||
|
tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region
|
||||||
|
|
||||||
async def patched(**kw):
|
async def patched(**kw):
|
||||||
merged = dict(defaults)
|
merged = dict(defaults)
|
||||||
merged.update(kw)
|
merged.update(kw)
|
||||||
ctx = await original(**merged)
|
ctx = await original(**merged)
|
||||||
_patch_new_page_sleep(ctx)
|
_patch_new_page_sleep(ctx)
|
||||||
|
if prep:
|
||||||
|
from ._recaptcha_seed import seed_recaptcha_cookies_async
|
||||||
|
await seed_recaptcha_cookies_async(ctx, profile, timezone=tz)
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
browser.new_context = patched # type: ignore[assignment]
|
browser.new_context = patched # type: ignore[assignment]
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,15 @@ from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
||||||
from .download import cache_root, ensure_binary
|
from .download import cache_root, ensure_binary
|
||||||
|
|
||||||
|
|
||||||
def _cmd_fetch(_args: argparse.Namespace) -> int:
|
def _cmd_fetch(args: argparse.Namespace) -> int:
|
||||||
|
# --force: re-download even if already cached (drop the cached version dir,
|
||||||
|
# then let ensure_binary fetch it fresh). Useful to recover a corrupted cache
|
||||||
|
# or re-pull after a re-published release.
|
||||||
|
if getattr(args, "force", False):
|
||||||
|
from .download import cache_dir_for_version
|
||||||
|
d = cache_dir_for_version()
|
||||||
|
if d.exists():
|
||||||
|
shutil.rmtree(d, ignore_errors=True)
|
||||||
path = ensure_binary()
|
path = ensure_binary()
|
||||||
print(path)
|
print(path)
|
||||||
return 0
|
return 0
|
||||||
|
|
@ -44,9 +52,17 @@ def _cmd_clear_cache(_args: argparse.Namespace) -> int:
|
||||||
|
|
||||||
def build_parser() -> argparse.ArgumentParser:
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
p = argparse.ArgumentParser(prog="invisible-playwright", description="invisible_playwright CLI")
|
p = argparse.ArgumentParser(prog="invisible-playwright", description="invisible_playwright CLI")
|
||||||
sub = p.add_subparsers(dest="cmd", required=True)
|
# Top-level `--version` / `-V` flag so `python -m invisible_playwright --version`
|
||||||
|
# works (Python convention), in addition to the existing `version` subcommand.
|
||||||
|
p.add_argument(
|
||||||
|
"-V", "--version", action="version",
|
||||||
|
version=f"invisible_playwright {__version__} (BINARY_VERSION={BINARY_VERSION}, Firefox {FIREFOX_UPSTREAM_VERSION})",
|
||||||
|
)
|
||||||
|
sub = p.add_subparsers(dest="cmd")
|
||||||
|
|
||||||
sub.add_parser("fetch", help="download the patched Firefox binary")
|
fetch_p = sub.add_parser("fetch", help="download the patched Firefox binary")
|
||||||
|
fetch_p.add_argument("--force", action="store_true",
|
||||||
|
help="re-download even if already cached")
|
||||||
sub.add_parser("path", help="print the absolute path to the cached binary")
|
sub.add_parser("path", help="print the absolute path to the cached binary")
|
||||||
sub.add_parser("version", help="print wrapper and binary versions")
|
sub.add_parser("version", help="print wrapper and binary versions")
|
||||||
sub.add_parser("clear-cache", help="remove all cached binaries")
|
sub.add_parser("clear-cache", help="remove all cached binaries")
|
||||||
|
|
@ -54,7 +70,15 @@ def build_parser() -> argparse.ArgumentParser:
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str] | None = None) -> int:
|
def main(argv: list[str] | None = None) -> int:
|
||||||
args = build_parser().parse_args(argv)
|
parser = build_parser()
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
if args.cmd is None:
|
||||||
|
# argparse-conventional: print usage + error message to stderr, exit 2.
|
||||||
|
# We can't keep `required=True` on the subparsers because that breaks
|
||||||
|
# the top-level `--version` flag (argparse demands a subcommand even
|
||||||
|
# when --version is the only token). parser.error() preserves the
|
||||||
|
# original "no subcommand" exit semantics tests expect.
|
||||||
|
parser.error("a subcommand is required (try --help, --version, or one of: fetch, path, version, clear-cache)")
|
||||||
dispatch = {
|
dispatch = {
|
||||||
"fetch": _cmd_fetch,
|
"fetch": _cmd_fetch,
|
||||||
"path": _cmd_path,
|
"path": _cmd_path,
|
||||||
|
|
|
||||||
110
src/invisible_playwright/config.py
Normal file
110
src/invisible_playwright/config.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
"""Public helpers for building Firefox launch config without using ``InvisiblePlaywright``.
|
||||||
|
|
||||||
|
Use these when you need to call ``playwright.firefox.launch()`` (or
|
||||||
|
``firefox.launch_persistent_context()``) directly with our patched binary
|
||||||
|
and stealth prefs, instead of using the ``InvisiblePlaywright`` context
|
||||||
|
manager.
|
||||||
|
|
||||||
|
Typical caller is an external integration that owns its own browser
|
||||||
|
lifecycle (a Crawlee/Skyvern/changedetection-style fetcher, a Playwright
|
||||||
|
Server wrapper, a multi-language harness) and just wants the building
|
||||||
|
blocks::
|
||||||
|
|
||||||
|
from playwright.async_api import async_playwright
|
||||||
|
from invisible_playwright import ensure_binary, get_default_stealth_prefs
|
||||||
|
|
||||||
|
async with async_playwright() as p:
|
||||||
|
browser = await p.firefox.launch(
|
||||||
|
executable_path=str(ensure_binary()),
|
||||||
|
firefox_user_prefs=get_default_stealth_prefs(seed=42),
|
||||||
|
)
|
||||||
|
|
||||||
|
For everyday Python usage the ``InvisiblePlaywright`` context manager is
|
||||||
|
still the recommended entry point; these helpers expose the same internals
|
||||||
|
without the lifecycle ownership.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
When calling ``firefox.launch()`` yourself, pass ``headless=False`` and
|
||||||
|
manage the display hiding (Xvfb on Linux, hidden desktop on Windows)
|
||||||
|
externally. Passing ``headless=True`` directly to Playwright puts
|
||||||
|
Firefox in true headless mode, which skips the real rendering pipeline
|
||||||
|
and breaks canvas / audio / WebGL fingerprint coherence. The
|
||||||
|
``InvisiblePlaywright`` context manager does this translation
|
||||||
|
automatically; the public helpers leave it to the caller.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import secrets
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
from ._fpforge import generate_profile
|
||||||
|
from .prefs import translate_profile_to_prefs
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_stealth_prefs(
|
||||||
|
seed: Optional[int] = None,
|
||||||
|
*,
|
||||||
|
pin: Optional[Dict[str, Any]] = None,
|
||||||
|
locale: str = "en-US",
|
||||||
|
timezone: str = "",
|
||||||
|
extra_prefs: Optional[Dict[str, Any]] = None,
|
||||||
|
humanize: Union[bool, float] = True,
|
||||||
|
virtual_display: bool = False,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Build a complete ``firefox_user_prefs`` dict for ``firefox.launch()``.
|
||||||
|
|
||||||
|
Same prefs that ``InvisiblePlaywright(seed=..., locale=..., timezone=...,
|
||||||
|
extra_prefs=..., humanize=...)`` would inject. Use this when you need to
|
||||||
|
drive ``playwright.firefox.launch()`` yourself.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
seed: Integer seed for the Bayesian fingerprint sampler. Same seed
|
||||||
|
produces the same fingerprint. ``None`` generates a fresh
|
||||||
|
random int31 (matches ``InvisiblePlaywright`` default).
|
||||||
|
pin: Optional dict forcing specific fingerprint fields while the
|
||||||
|
rest stays seed-derived. See ``docs/pinning.md``.
|
||||||
|
locale: BCP-47 tag (e.g. ``"en-US"``). Drives ``Accept-Language``
|
||||||
|
and ``navigator.language``.
|
||||||
|
timezone: IANA timezone (e.g. ``"America/New_York"``). Empty means
|
||||||
|
use the host TZ. This pure pref builder does NOT resolve
|
||||||
|
``"auto"`` (that needs the proxy + a network lookup at launch
|
||||||
|
time) — pass a concrete zone here, or use ``InvisiblePlaywright``
|
||||||
|
/ ``resolve_session_timezone(timezone, proxy)`` for ``"auto"``.
|
||||||
|
extra_prefs: Optional dict overlaid LAST onto the generated prefs.
|
||||||
|
humanize: When True (default), every mouse move is expanded into
|
||||||
|
a Bezier trajectory by the patched Juggler. A float caps the
|
||||||
|
motion in seconds. False disables the behavior.
|
||||||
|
virtual_display: When True on Windows, apply GPU-disabling prefs
|
||||||
|
to prevent GPU process crashes on virtual desktops without
|
||||||
|
D3D11 backend.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict ready to pass as ``firefox_user_prefs=`` to
|
||||||
|
``playwright.firefox.launch()`` or ``launch_persistent_context()``.
|
||||||
|
"""
|
||||||
|
resolved_seed = int(seed) if seed is not None else secrets.randbits(31)
|
||||||
|
profile = generate_profile(resolved_seed, pin=pin)
|
||||||
|
prefs = translate_profile_to_prefs(
|
||||||
|
profile,
|
||||||
|
locale=locale,
|
||||||
|
timezone=timezone,
|
||||||
|
extra_prefs=extra_prefs,
|
||||||
|
virtual_display=virtual_display,
|
||||||
|
)
|
||||||
|
prefs["invisible_playwright.humanize"] = bool(humanize)
|
||||||
|
if humanize:
|
||||||
|
max_seconds = float(humanize) if not isinstance(humanize, bool) else 1.5
|
||||||
|
prefs["invisible_playwright.humanize.maxTime"] = str(max_seconds)
|
||||||
|
return prefs
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_args() -> List[str]:
|
||||||
|
"""Return the default Firefox CLI args to pass via ``args=``.
|
||||||
|
|
||||||
|
Currently empty list, since all our stealth configuration is delivered
|
||||||
|
via ``firefox_user_prefs`` rather than CLI flags. Exposed for parity
|
||||||
|
with the ``cloakbrowser.config.get_default_stealth_args`` pattern and
|
||||||
|
to future-proof integrations that already wire ``args=[*existing,
|
||||||
|
*get_default_args()]``.
|
||||||
|
"""
|
||||||
|
return []
|
||||||
|
|
@ -7,7 +7,14 @@ bugfixes don't force a multi-hour Firefox rebuild.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
# Bump this when a new patched Firefox build is released on GitHub.
|
# Bump this when a new patched Firefox build is released on GitHub.
|
||||||
BINARY_VERSION: str = "firefox-5"
|
BINARY_VERSION: str = "firefox-9"
|
||||||
|
|
||||||
|
# Releases known to be broken — ensure_binary() refuses them with a clear error
|
||||||
|
# instead of handing the user an unusable binary. firefox-8 was packaged without
|
||||||
|
# the juggler automation layer, so Playwright cannot drive it (TargetClosedError);
|
||||||
|
# fixed in firefox-9 (package-manifest.in now ships chrome/juggler). A cached
|
||||||
|
# firefox-8 from before the bump would otherwise keep being used silently.
|
||||||
|
BROKEN_VERSIONS: frozenset[str] = frozenset({"firefox-8"})
|
||||||
|
|
||||||
# Underlying Firefox version (for display only; does not drive downloads).
|
# Underlying Firefox version (for display only; does not drive downloads).
|
||||||
FIREFOX_UPSTREAM_VERSION: str = "150.0.1"
|
FIREFOX_UPSTREAM_VERSION: str = "150.0.1"
|
||||||
|
|
@ -19,13 +26,15 @@ BINARY_BASENAME: str = f"firefox-{FIREFOX_UPSTREAM_VERSION}-stealth"
|
||||||
def ARCHIVE_NAME(platform_key: str, machine: str) -> str:
|
def ARCHIVE_NAME(platform_key: str, machine: str) -> str:
|
||||||
"""Return the platform-specific archive filename.
|
"""Return the platform-specific archive filename.
|
||||||
|
|
||||||
platform_key: sys.platform ("win32", "linux")
|
platform_key: sys.platform ("win32", "linux", "darwin")
|
||||||
machine: platform.machine() ("AMD64", "x86_64", ...)
|
machine: platform.machine() ("AMD64", "x86_64", "arm64", "aarch64", ...)
|
||||||
"""
|
"""
|
||||||
pk = platform_key.lower()
|
pk = platform_key.lower()
|
||||||
m = machine.lower()
|
m = machine.lower()
|
||||||
if m in {"amd64", "x86_64"}:
|
if m in {"amd64", "x86_64"}:
|
||||||
arch = "x86_64"
|
arch = "x86_64"
|
||||||
|
elif m in {"arm64", "aarch64"}:
|
||||||
|
arch = "arm64"
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"unsupported arch: {machine}")
|
raise NotImplementedError(f"unsupported arch: {machine}")
|
||||||
|
|
||||||
|
|
@ -33,16 +42,39 @@ def ARCHIVE_NAME(platform_key: str, machine: str) -> str:
|
||||||
return f"{BINARY_BASENAME}-win-{arch}.zip"
|
return f"{BINARY_BASENAME}-win-{arch}.zip"
|
||||||
if pk == "linux":
|
if pk == "linux":
|
||||||
return f"{BINARY_BASENAME}-linux-{arch}.tar.gz"
|
return f"{BINARY_BASENAME}-linux-{arch}.tar.gz"
|
||||||
|
if pk == "darwin":
|
||||||
|
return f"{BINARY_BASENAME}-macos-{arch}.tar.gz"
|
||||||
raise NotImplementedError(f"unsupported platform: {platform_key}")
|
raise NotImplementedError(f"unsupported platform: {platform_key}")
|
||||||
|
|
||||||
|
|
||||||
# Binary entry point relative path inside the extracted archive root.
|
# Binary entry point relative path inside the extracted archive root.
|
||||||
|
# macOS ships the .app bundle (renamed to a stable "Firefox.app" by release.yml);
|
||||||
|
# the wrapper execs the inner binary directly, which sidesteps Gatekeeper.
|
||||||
BINARY_ENTRY_REL = {
|
BINARY_ENTRY_REL = {
|
||||||
"win32": "firefox.exe",
|
"win32": "firefox.exe",
|
||||||
"linux": "firefox",
|
"linux": "firefox",
|
||||||
|
"darwin": "Firefox.app/Contents/MacOS/firefox",
|
||||||
}
|
}
|
||||||
|
|
||||||
# GitHub release URL template. The "TODO" owner is resolved at publication time.
|
# GitHub release URL template. The "TODO" owner is resolved at publication time.
|
||||||
RELEASE_URL_TEMPLATE = (
|
RELEASE_URL_TEMPLATE = (
|
||||||
"https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}"
|
"https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# GeoIP database (timezone="auto" → resolve IANA zone from proxy egress IP)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# daijro/geoip-all-in-one merges IP2Location LITE + GeoLite2 + DB-IP into a
|
||||||
|
# single mmdb (country ISO + coordinates + IANA timezone via tzfpy), rebuilt
|
||||||
|
# weekly. GPL-3.0, so we DOWNLOAD it at runtime into the user cache (like the
|
||||||
|
# Firefox binary) rather than bundling it into this MIT package. The `-all`
|
||||||
|
# variant covers IPv4+IPv6. download.py tracks the LATEST release and refreshes
|
||||||
|
# weekly; GEOIP_MMDB_VERSION is only the cold-cache fallback when the GitHub
|
||||||
|
# API is unreachable on a machine that has never downloaded the DB.
|
||||||
|
GEOIP_REPO: str = "daijro/geoip-all-in-one"
|
||||||
|
GEOIP_MMDB_VERSION: str = "2026.06.03"
|
||||||
|
GEOIP_ASSET: str = "geoip-aio-all.mmdb.zip"
|
||||||
|
GEOIP_MMDB_NAME: str = "geoip-aio-all.mmdb"
|
||||||
|
GEOIP_RELEASE_URL_TEMPLATE: str = (
|
||||||
|
"https://github.com/daijro/geoip-all-in-one/releases/download/{tag}/{asset}"
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,12 @@ import hashlib
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -18,6 +21,11 @@ from .constants import (
|
||||||
ARCHIVE_NAME,
|
ARCHIVE_NAME,
|
||||||
BINARY_ENTRY_REL,
|
BINARY_ENTRY_REL,
|
||||||
BINARY_VERSION,
|
BINARY_VERSION,
|
||||||
|
BROKEN_VERSIONS,
|
||||||
|
GEOIP_ASSET,
|
||||||
|
GEOIP_MMDB_NAME,
|
||||||
|
GEOIP_MMDB_VERSION,
|
||||||
|
GEOIP_RELEASE_URL_TEMPLATE,
|
||||||
RELEASE_URL_TEMPLATE,
|
RELEASE_URL_TEMPLATE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -114,8 +122,39 @@ def _extract(archive: Path, dst: Path) -> None:
|
||||||
raise RuntimeError(f"unknown archive format: {archive}")
|
raise RuntimeError(f"unknown archive format: {archive}")
|
||||||
|
|
||||||
|
|
||||||
|
def _post_extract_darwin(app_root: Path, entry: Path) -> None:
|
||||||
|
"""Make an ad-hoc-signed .app launchable on macOS.
|
||||||
|
|
||||||
|
The .app is downloaded via requests (no Finder quarantine attached), but we
|
||||||
|
strip com.apple.quarantine defensively and ensure the inner binary is
|
||||||
|
executable. We exec the inner binary directly (not via LaunchServices), so
|
||||||
|
Gatekeeper's first-launch prompt does not apply; the ad-hoc signature
|
||||||
|
(applied in release.yml) is what lets the arm64 Mach-O run at all.
|
||||||
|
"""
|
||||||
|
app = app_root
|
||||||
|
# walk up to the .app bundle dir if entry points inside it
|
||||||
|
for parent in entry.parents:
|
||||||
|
if parent.name.endswith(".app"):
|
||||||
|
app = parent
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
subprocess.run(["xattr", "-dr", "com.apple.quarantine", str(app)], check=False)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
entry.chmod(0o755)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def ensure_binary(version: str = BINARY_VERSION) -> Path:
|
def ensure_binary(version: str = BINARY_VERSION) -> Path:
|
||||||
"""Return a path to a runnable Firefox executable. Download if needed."""
|
"""Return a path to a runnable Firefox executable. Download if needed."""
|
||||||
|
if version in BROKEN_VERSIONS:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"{version} is a known-broken release (the juggler automation layer is "
|
||||||
|
f"missing, so Playwright cannot drive it). Upgrade invisible_playwright "
|
||||||
|
f"(current BINARY_VERSION={BINARY_VERSION}) or pass a newer version."
|
||||||
|
)
|
||||||
plat = sys.platform
|
plat = sys.platform
|
||||||
mach = platform.machine()
|
mach = platform.machine()
|
||||||
asset = ARCHIVE_NAME(plat, mach)
|
asset = ARCHIVE_NAME(plat, mach)
|
||||||
|
|
@ -148,6 +187,142 @@ def ensure_binary(version: str = BINARY_VERSION) -> Path:
|
||||||
)
|
)
|
||||||
_extract(archive_path, version_dir)
|
_extract(archive_path, version_dir)
|
||||||
|
|
||||||
|
if plat == "darwin":
|
||||||
|
_post_extract_darwin(version_dir, entry)
|
||||||
|
|
||||||
if not entry.exists():
|
if not entry.exists():
|
||||||
raise RuntimeError(f"binary not found after extraction: {entry}")
|
raise RuntimeError(f"binary not found after extraction: {entry}")
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# GeoIP mmdb (timezone="auto" → map egress IP → IANA zone)
|
||||||
|
#
|
||||||
|
# daijro/geoip-all-in-one is rebuilt WEEKLY, so we don't pin a tag. We cache
|
||||||
|
# the latest mmdb and, once it's older than GEOIP_REFRESH_DAYS, re-check the
|
||||||
|
# latest release and pull a newer build if one exists. Net effect: no download
|
||||||
|
# (not even an API call) on a launch within the window; auto-refresh after it;
|
||||||
|
# a stale cache is reused when offline rather than breaking the launch.
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
GEOIP_REFRESH_DAYS = 7 # matches daijro's weekly rebuild cadence
|
||||||
|
|
||||||
|
|
||||||
|
def _geoip_root() -> Path:
|
||||||
|
return cache_root() / "geoip"
|
||||||
|
|
||||||
|
|
||||||
|
def _geoip_check_marker() -> Path:
|
||||||
|
return _geoip_root() / ".last_check"
|
||||||
|
|
||||||
|
|
||||||
|
def _cached_geoip_mmdb() -> Path | None:
|
||||||
|
"""Newest cached mmdb across tag dirs, or None. Tag dirs are date strings
|
||||||
|
(e.g. ``2026.06.03``) so a lexical sort is chronological."""
|
||||||
|
root = _geoip_root()
|
||||||
|
if not root.exists():
|
||||||
|
return None
|
||||||
|
cands = sorted(root.glob("*/*.mmdb"))
|
||||||
|
return cands[-1] if cands else None
|
||||||
|
|
||||||
|
|
||||||
|
def _geoip_cache_fresh(max_age_days: int) -> bool:
|
||||||
|
marker = _geoip_check_marker()
|
||||||
|
if not marker.exists():
|
||||||
|
return False
|
||||||
|
return (time.time() - marker.stat().st_mtime) < max_age_days * 86400
|
||||||
|
|
||||||
|
|
||||||
|
def _touch_geoip_marker() -> None:
|
||||||
|
m = _geoip_check_marker()
|
||||||
|
m.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
m.touch()
|
||||||
|
|
||||||
|
|
||||||
|
def _latest_geoip_tag() -> str:
|
||||||
|
"""Latest ``daijro/geoip-all-in-one`` release tag via the GitHub API."""
|
||||||
|
headers = {"Accept": "application/vnd.github+json"}
|
||||||
|
token = _github_token()
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"token {token}"
|
||||||
|
r = requests.get(
|
||||||
|
f"https://api.github.com/repos/{GEOIP_REPO}/releases/latest",
|
||||||
|
headers=headers, timeout=15,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
tag = r.json().get("tag_name")
|
||||||
|
if not tag:
|
||||||
|
raise RuntimeError("no tag_name in geoip-all-in-one latest release")
|
||||||
|
return tag
|
||||||
|
|
||||||
|
|
||||||
|
def _download_geoip_tag(tag: str) -> Path:
|
||||||
|
"""Download + extract a specific tag's mmdb if not already cached."""
|
||||||
|
dst_dir = _geoip_root() / tag
|
||||||
|
target = dst_dir / GEOIP_MMDB_NAME
|
||||||
|
if not target.exists():
|
||||||
|
url = GEOIP_RELEASE_URL_TEMPLATE.format(tag=tag, asset=GEOIP_ASSET)
|
||||||
|
dst_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
archive = Path(td) / GEOIP_ASSET
|
||||||
|
_download_file(url, archive)
|
||||||
|
_extract(archive, dst_dir)
|
||||||
|
if target.exists():
|
||||||
|
return target
|
||||||
|
# asset name inside the zip may differ from GEOIP_MMDB_NAME
|
||||||
|
found = sorted(dst_dir.glob("*.mmdb"))
|
||||||
|
if found:
|
||||||
|
return found[0]
|
||||||
|
raise RuntimeError(f"geoip mmdb not found after extraction in {dst_dir}")
|
||||||
|
|
||||||
|
|
||||||
|
def _prune_old_geoip_tags(keep: str) -> None:
|
||||||
|
"""Drop every cached tag dir except ``keep`` to bound disk usage."""
|
||||||
|
root = _geoip_root()
|
||||||
|
if not root.exists():
|
||||||
|
return
|
||||||
|
for d in root.iterdir():
|
||||||
|
if d.is_dir() and d.name != keep:
|
||||||
|
shutil.rmtree(d, ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
|
def geoip_mmdb_path() -> Path | None:
|
||||||
|
"""Path to the currently-cached mmdb (newest tag), or None if none cached."""
|
||||||
|
return _cached_geoip_mmdb()
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_geoip_mmdb(max_age_days: int = GEOIP_REFRESH_DAYS) -> Path:
|
||||||
|
"""Return a geoip mmdb, kept fresh against daijro's weekly rebuild.
|
||||||
|
|
||||||
|
Resolution order:
|
||||||
|
1. ``STEALTHFOX_GEOIP_MMDB`` env → use that file (user-supplied / test).
|
||||||
|
2. A cached mmdb younger than ``max_age_days`` → use it (no network).
|
||||||
|
3. Else ask GitHub for the latest tag, download it if not already cached,
|
||||||
|
prune older tags, and reset the freshness timer.
|
||||||
|
4. If the API/download is unreachable but a cached mmdb exists → use it
|
||||||
|
(and reset the timer so we don't hammer the API while offline).
|
||||||
|
5. Cold cache + no network → fall back to the pinned ``GEOIP_MMDB_VERSION``;
|
||||||
|
if that download also fails, raise.
|
||||||
|
"""
|
||||||
|
override = os.environ.get("STEALTHFOX_GEOIP_MMDB")
|
||||||
|
if override:
|
||||||
|
p = Path(override)
|
||||||
|
if not p.exists():
|
||||||
|
raise RuntimeError(f"STEALTHFOX_GEOIP_MMDB points to a missing file: {p}")
|
||||||
|
return p
|
||||||
|
|
||||||
|
cached = _cached_geoip_mmdb()
|
||||||
|
if cached and _geoip_cache_fresh(max_age_days):
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
tag = _latest_geoip_tag()
|
||||||
|
except Exception:
|
||||||
|
if cached:
|
||||||
|
_touch_geoip_marker() # recheck after the window; don't hammer
|
||||||
|
return cached
|
||||||
|
tag = GEOIP_MMDB_VERSION # cold cache + API down → pinned fallback
|
||||||
|
|
||||||
|
mmdb = _download_geoip_tag(tag)
|
||||||
|
_prune_old_geoip_tags(mmdb.parent.name)
|
||||||
|
_touch_geoip_marker()
|
||||||
|
return mmdb
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from typing import Any, Dict, Optional, Union
|
||||||
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
||||||
|
|
||||||
from ._fpforge import Profile, generate_profile
|
from ._fpforge import Profile, generate_profile
|
||||||
|
from ._geo import resolve_session_timezone
|
||||||
from ._headless import make_virtual_display
|
from ._headless import make_virtual_display
|
||||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||||
from .download import ensure_binary
|
from .download import ensure_binary
|
||||||
|
|
@ -113,6 +114,7 @@ class InvisiblePlaywright:
|
||||||
extra_prefs: Optional[Dict[str, Any]] = None,
|
extra_prefs: Optional[Dict[str, Any]] = None,
|
||||||
binary_path: Optional[str] = None,
|
binary_path: Optional[str] = None,
|
||||||
profile_dir: Optional[Union[str, Path]] = None,
|
profile_dir: Optional[Union[str, Path]] = None,
|
||||||
|
prep_recaptcha: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -134,8 +136,14 @@ class InvisiblePlaywright:
|
||||||
a float caps the motion in seconds.
|
a float caps the motion in seconds.
|
||||||
locale: BCP-47 tag (e.g. ``"en-US"``). Drives the
|
locale: BCP-47 tag (e.g. ``"en-US"``). Drives the
|
||||||
``Accept-Language`` header and ``navigator.language``.
|
``Accept-Language`` header and ``navigator.language``.
|
||||||
timezone: IANA timezone (e.g. ``"America/New_York"``). Empty
|
timezone: IANA zone (e.g. ``"America/New_York"``) — used as-is
|
||||||
means use the host TZ.
|
when set, the only way to force a specific zone. ``""``
|
||||||
|
(default) or ``"auto"`` ALWAYS resolves from the egress IP:
|
||||||
|
through the proxy when one is set, otherwise from the host's
|
||||||
|
own public IP (one lookup + an offline mmdb). On failure: with
|
||||||
|
a proxy it raises (a foreign proxy on the host TZ is the
|
||||||
|
``timezone_mismatch`` signal); without a proxy it falls back to
|
||||||
|
the host TZ so a transient lookup failure can't break launch.
|
||||||
extra_prefs: Optional dict of Firefox prefs overlayed on top
|
extra_prefs: Optional dict of Firefox prefs overlayed on top
|
||||||
of the generated profile — useful for niche tweaks
|
of the generated profile — useful for niche tweaks
|
||||||
without monkey-patching the package.
|
without monkey-patching the package.
|
||||||
|
|
@ -166,6 +174,10 @@ class InvisiblePlaywright:
|
||||||
self._extra_prefs = extra_prefs
|
self._extra_prefs = extra_prefs
|
||||||
self._binary_path = binary_path
|
self._binary_path = binary_path
|
||||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||||
|
# reCAPTCHA cookie pre-seed — opt-in. Gated server-side: if a
|
||||||
|
# persistent profile_dir is in use, respect its existing cookies
|
||||||
|
# and DON'T enable pre-seed (the profile owns its own state).
|
||||||
|
self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None
|
||||||
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
self._profile: Profile = generate_profile(self.seed, pin=self._pin)
|
||||||
self._pw: Optional[Playwright] = None
|
self._pw: Optional[Playwright] = None
|
||||||
self._browser: Optional[Browser] = None
|
self._browser: Optional[Browser] = None
|
||||||
|
|
@ -173,6 +185,10 @@ class InvisiblePlaywright:
|
||||||
self._virtual_display: Any = None
|
self._virtual_display: Any = None
|
||||||
|
|
||||||
def __enter__(self) -> Union[Browser, BrowserContext]:
|
def __enter__(self) -> Union[Browser, BrowserContext]:
|
||||||
|
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||||
|
# concrete IANA zone before anything reads self._timezone. Fail-early
|
||||||
|
# if a proxy is set but the egress zone can't be resolved.
|
||||||
|
self._timezone = resolve_session_timezone(self._timezone, self._proxy)
|
||||||
executable = self._binary_path or ensure_binary()
|
executable = self._binary_path or ensure_binary()
|
||||||
prefs = self._build_prefs()
|
prefs = self._build_prefs()
|
||||||
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
||||||
|
|
@ -240,12 +256,18 @@ class InvisiblePlaywright:
|
||||||
"""
|
"""
|
||||||
original = browser.new_context
|
original = browser.new_context
|
||||||
defaults = self._default_context_kwargs()
|
defaults = self._default_context_kwargs()
|
||||||
|
prep = self._prep_recaptcha
|
||||||
|
profile = self._profile # pass the whole Profile (seed + browsing_history)
|
||||||
|
tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region
|
||||||
|
|
||||||
def patched(**kw):
|
def patched(**kw):
|
||||||
merged = dict(defaults)
|
merged = dict(defaults)
|
||||||
merged.update(kw) # user-supplied wins
|
merged.update(kw) # user-supplied wins
|
||||||
ctx = original(**merged)
|
ctx = original(**merged)
|
||||||
_patch_sync_new_page_sleep(ctx)
|
_patch_sync_new_page_sleep(ctx)
|
||||||
|
if prep:
|
||||||
|
from ._recaptcha_seed import seed_recaptcha_cookies_sync
|
||||||
|
seed_recaptcha_cookies_sync(ctx, profile, timezone=tz)
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
browser.new_context = patched # type: ignore[assignment]
|
browser.new_context = patched # type: ignore[assignment]
|
||||||
|
|
|
||||||
|
|
@ -289,13 +289,29 @@ _BASELINE: Dict[str, Any] = {
|
||||||
"network.dns.echconfig.enabled": False,
|
"network.dns.echconfig.enabled": False,
|
||||||
"network.dns.use_https_rr_as_altsvc": False,
|
"network.dns.use_https_rr_as_altsvc": False,
|
||||||
|
|
||||||
# === A/B VARIANT B: Fission disabled ===
|
# === Fission / site-isolation disabled (FF146 Playwright parity) ===
|
||||||
# Force single content-process model (e10s only, no BC outer/inner split).
|
# Force a single content-process model. Three knobs are required in FF150:
|
||||||
# Diagnostic for the FF150 BC-swap theory: if peet_ws/fppro/sannysoft
|
# upstream Playwright Firefox (FF146-based) only needed fission.autostart=False
|
||||||
# work with this off, the Juggler FF146 baseline breaks specifically on
|
# because FF146's default isolation strategy was looser. FF150 ships with
|
||||||
# cross-process navigation tracking.
|
# fission.webContentIsolationStrategy=1 (IsolateEverything) which still
|
||||||
|
# site-isolates cross-origin iframes into separate `webIsolated` content
|
||||||
|
# processes EVEN WHEN fission.autostart is False. From the parent process's
|
||||||
|
# point of view, those iframes get a Juggler Frame placeholder with no
|
||||||
|
# docShell, no URL, and an execution context that wraps the wrong global,
|
||||||
|
# so frame.evaluate() fails with cross-origin SOP errors and
|
||||||
|
# element_handle.content_frame() returns None.
|
||||||
|
#
|
||||||
|
# Pinning the strategy to 0 keeps every cross-origin web iframe in the
|
||||||
|
# parent's content process, where the Juggler code paths from the FF146
|
||||||
|
# era expect them. processCount.webIsolated=1 is kept as belt-and-suspenders
|
||||||
|
# in case some path still classifies an origin as webIsolated despite the
|
||||||
|
# strategy change. It costs nothing to leave.
|
||||||
|
#
|
||||||
|
# See issue #20 + tests/test_cross_origin_iframe.py for the regression
|
||||||
|
# sentinel that catches a future A/B flipping these back.
|
||||||
"fission.autostart": False,
|
"fission.autostart": False,
|
||||||
"fission.autostart.session": False,
|
"fission.autostart.session": False,
|
||||||
|
"fission.webContentIsolationStrategy": 0, # IsolateNothing
|
||||||
"dom.ipc.processCount.webIsolated": 1,
|
"dom.ipc.processCount.webIsolated": 1,
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -385,19 +401,19 @@ _WIN_VIRT_DESKTOP_WORKAROUNDS: Dict[str, Any] = {
|
||||||
# restores hardware compositor + functional WebGL on alt desktops.
|
# restores hardware compositor + functional WebGL on alt desktops.
|
||||||
"security.sandbox.gpu.level": 0,
|
"security.sandbox.gpu.level": 0,
|
||||||
# Same root cause as above, content process side. Wrapper repo issue #18
|
# Same root cause as above, content process side. Wrapper repo issue #18
|
||||||
# (id.sky.com tab crash). Sandbox content level > 4 puts content processes
|
# (tab crash on cross-process navigation under headless=True). Sandbox
|
||||||
# on the sandbox's own kAlternateWinstation (see
|
# content level > 4 puts content processes on the sandbox's own
|
||||||
# security/sandbox/win/src/sandboxbroker/sandboxBroker.cpp line 1113-1114:
|
# kAlternateWinstation (see security/sandbox/win/src/sandboxbroker/
|
||||||
|
# sandboxBroker.cpp line 1113-1114:
|
||||||
# `if (aSandboxLevel > 4) config->SetDesktop(kAlternateWinstation)`).
|
# `if (aSandboxLevel > 4) config->SetDesktop(kAlternateWinstation)`).
|
||||||
# Combined with our CreateDesktop alt-desktop, that puts browser process
|
# Combined with our CreateDesktop alt-desktop, that puts browser process
|
||||||
# and content processes on DIFFERENT desktops. Cross-process navigation
|
# and content processes on DIFFERENT desktops. Cross-process navigation
|
||||||
# (Adobe AppMeasurement → new origin → new content process on a new
|
# then fails window parenting between parent and child, the content
|
||||||
# desktop) then fails window parenting between parent and child → content
|
|
||||||
# process exits cleanly (exitCode=0, signal=null) and Playwright fires
|
# process exits cleanly (exitCode=0, signal=null) and Playwright fires
|
||||||
# page.on('crash') ~10s after page load. Lowering content sandbox to 4
|
# page.on('crash') ~10s after page load. Lowering content sandbox to 4
|
||||||
# keeps content processes on the same desktop as the browser process,
|
# keeps content processes on the same desktop as the browser process,
|
||||||
# which is what we want here (and is still tight enough — level 4
|
# which is what we want here (still tight enough — level 4 blocks
|
||||||
# blocks file/registry write, network calls, hardware access).
|
# file/registry write, network calls, hardware access).
|
||||||
"security.sandbox.content.level": 4,
|
"security.sandbox.content.level": 4,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,12 @@
|
||||||
|
import os
|
||||||
import random
|
import random
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from invisible_playwright._fpforge import generate_profile
|
from invisible_playwright._fpforge import generate_profile
|
||||||
|
from invisible_playwright.constants import BINARY_ENTRY_REL
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -15,3 +19,36 @@ def deterministic_rng():
|
||||||
def sample_profile():
|
def sample_profile():
|
||||||
"""A Profile generated from seed=42 for reuse across tests."""
|
"""A Profile generated from seed=42 for reuse across tests."""
|
||||||
return generate_profile(seed=42)
|
return generate_profile(seed=42)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def firefox_binary():
|
||||||
|
"""Locate the patched Firefox binary for E2E tests, or skip cleanly.
|
||||||
|
|
||||||
|
Single source of truth for every E2E test (previously each test file had its
|
||||||
|
own copy — and three of them silently ignored INVPW_BINARY_PATH, so they kept
|
||||||
|
testing whatever was in the cache even when you pointed the suite at a
|
||||||
|
specific build: a false-confidence trap). Lookup order:
|
||||||
|
|
||||||
|
1. ``INVPW_BINARY_PATH`` env var — point the whole suite at a local build
|
||||||
|
or a freshly-extracted release (this is how the full-suite gate runs).
|
||||||
|
2. Cached binary under ``cache_dir_for_version()`` (post ``fetch``).
|
||||||
|
3. Skip — we never trigger an implicit multi-hundred-MB network download
|
||||||
|
inside a test run.
|
||||||
|
"""
|
||||||
|
env_path = os.environ.get("INVPW_BINARY_PATH")
|
||||||
|
if env_path:
|
||||||
|
if Path(env_path).exists():
|
||||||
|
return env_path
|
||||||
|
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
|
||||||
|
|
||||||
|
if sys.platform not in BINARY_ENTRY_REL:
|
||||||
|
pytest.skip(f"unsupported platform: {sys.platform}")
|
||||||
|
from invisible_playwright.download import cache_dir_for_version
|
||||||
|
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
||||||
|
if not entry.exists():
|
||||||
|
pytest.skip(
|
||||||
|
"patched Firefox binary not cached and INVPW_BINARY_PATH unset; "
|
||||||
|
"set INVPW_BINARY_PATH=<firefox binary> or run `invisible-playwright fetch`"
|
||||||
|
)
|
||||||
|
return str(entry)
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,26 @@ from invisible_playwright.constants import (
|
||||||
BINARY_BASENAME,
|
BINARY_BASENAME,
|
||||||
BINARY_ENTRY_REL,
|
BINARY_ENTRY_REL,
|
||||||
BINARY_VERSION,
|
BINARY_VERSION,
|
||||||
|
BROKEN_VERSIONS,
|
||||||
FIREFOX_UPSTREAM_VERSION,
|
FIREFOX_UPSTREAM_VERSION,
|
||||||
RELEASE_URL_TEMPLATE,
|
RELEASE_URL_TEMPLATE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_broken_versions_excludes_current():
|
||||||
|
"""The current BINARY_VERSION must NEVER be in BROKEN_VERSIONS — otherwise
|
||||||
|
every default ensure_binary() call would raise and the wrapper is unusable."""
|
||||||
|
assert BINARY_VERSION not in BROKEN_VERSIONS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_firefox_8_is_marked_broken():
|
||||||
|
"""firefox-8 shipped without the juggler layer (undrivable by Playwright);
|
||||||
|
it must stay flagged so a stale cache can't silently hand it to a user."""
|
||||||
|
assert "firefox-8" in BROKEN_VERSIONS
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_binary_version_format():
|
def test_binary_version_format():
|
||||||
assert BINARY_VERSION.startswith("firefox-")
|
assert BINARY_VERSION.startswith("firefox-")
|
||||||
|
|
@ -31,9 +46,16 @@ def test_archive_name_linux():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_archive_name_unsupported_raises():
|
def test_archive_name_macos_arm64():
|
||||||
|
name = ARCHIVE_NAME("darwin", "arm64")
|
||||||
|
assert name.endswith(".tar.gz")
|
||||||
|
assert "macos-arm64" in name
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_archive_name_truly_unsupported_raises():
|
||||||
with pytest.raises(NotImplementedError):
|
with pytest.raises(NotImplementedError):
|
||||||
ARCHIVE_NAME("darwin", "arm64")
|
ARCHIVE_NAME("plan9", "x86_64")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
|
|
@ -77,20 +99,18 @@ def test_archive_name_rejects_unsupported_arches(machine):
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@pytest.mark.parametrize("machine", ["arm64", "aarch64"])
|
@pytest.mark.parametrize("machine", ["arm64", "aarch64"])
|
||||||
def test_archive_name_arm64_not_yet_supported(machine):
|
def test_archive_name_arm64_supported(machine):
|
||||||
"""ARM64 is a frequent request (issue #6). Until binaries exist for it,
|
"""ARM64 is shipped now (issue #6): both Linux aarch64 and macOS arm64.
|
||||||
ARCHIVE_NAME should hard-fail rather than silently degrade. If this test
|
ARCHIVE_NAME must map both machine spellings to the canonical -arm64 asset."""
|
||||||
starts failing because someone shipped ARM64 builds, replace it with the
|
assert ARCHIVE_NAME("linux", machine) == "firefox-150.0.1-stealth-linux-arm64.tar.gz"
|
||||||
positive case."""
|
assert ARCHIVE_NAME("darwin", machine) == "firefox-150.0.1-stealth-macos-arm64.tar.gz"
|
||||||
with pytest.raises(NotImplementedError):
|
|
||||||
ARCHIVE_NAME("linux", machine)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
@pytest.mark.parametrize("platform_key", ["darwin", "freebsd", "cygwin", "openbsd"])
|
@pytest.mark.parametrize("platform_key", ["freebsd", "cygwin", "openbsd"])
|
||||||
def test_archive_name_rejects_unsupported_platforms(platform_key):
|
def test_archive_name_rejects_unsupported_platforms(platform_key):
|
||||||
"""Same logic — non-Linux/non-Windows platforms must raise, not silently
|
"""win32/linux/darwin are supported; other platforms must raise, not
|
||||||
pick one of the two."""
|
silently pick one of the three."""
|
||||||
with pytest.raises(NotImplementedError, match=platform_key):
|
with pytest.raises(NotImplementedError, match=platform_key):
|
||||||
ARCHIVE_NAME(platform_key, "x86_64")
|
ARCHIVE_NAME(platform_key, "x86_64")
|
||||||
|
|
||||||
|
|
@ -104,7 +124,7 @@ def test_archive_name_rejects_unsupported_platforms(platform_key):
|
||||||
def test_binary_entry_rel_covers_every_supported_platform():
|
def test_binary_entry_rel_covers_every_supported_platform():
|
||||||
"""If ARCHIVE_NAME accepts a platform key, BINARY_ENTRY_REL must declare
|
"""If ARCHIVE_NAME accepts a platform key, BINARY_ENTRY_REL must declare
|
||||||
where the executable lives inside the archive for it."""
|
where the executable lives inside the archive for it."""
|
||||||
for plat in ["win32", "linux"]:
|
for plat in ["win32", "linux", "darwin"]:
|
||||||
ARCHIVE_NAME(plat, "x86_64") # must not raise
|
ARCHIVE_NAME(plat, "x86_64") # must not raise
|
||||||
assert plat in BINARY_ENTRY_REL, (
|
assert plat in BINARY_ENTRY_REL, (
|
||||||
f"ARCHIVE_NAME accepts {plat!r} but BINARY_ENTRY_REL has no entry "
|
f"ARCHIVE_NAME accepts {plat!r} but BINARY_ENTRY_REL has no entry "
|
||||||
|
|
@ -118,6 +138,7 @@ def test_binary_entry_rel_extension_matches_platform():
|
||||||
assert BINARY_ENTRY_REL["win32"].endswith(".exe")
|
assert BINARY_ENTRY_REL["win32"].endswith(".exe")
|
||||||
assert not BINARY_ENTRY_REL["linux"].endswith(".exe")
|
assert not BINARY_ENTRY_REL["linux"].endswith(".exe")
|
||||||
assert BINARY_ENTRY_REL["linux"] == "firefox"
|
assert BINARY_ENTRY_REL["linux"] == "firefox"
|
||||||
|
assert BINARY_ENTRY_REL["darwin"].endswith(".app/Contents/MacOS/firefox")
|
||||||
|
|
||||||
|
|
||||||
# ---- RELEASE_URL_TEMPLATE shape ------------------------------------------- #
|
# ---- RELEASE_URL_TEMPLATE shape ------------------------------------------- #
|
||||||
|
|
|
||||||
278
tests/test_cross_origin_iframe.py
Normal file
278
tests/test_cross_origin_iframe.py
Normal file
|
|
@ -0,0 +1,278 @@
|
||||||
|
"""Regression tests for cross-origin / cross-process iframe interaction.
|
||||||
|
|
||||||
|
History: wrapper repo issue #20 reported that a third-party cookie
|
||||||
|
consent iframe was completely unreachable from Playwright in 0.1.7 —
|
||||||
|
``element_handle.content_frame()`` returned ``None``, ``frame.evaluate()``
|
||||||
|
threw cross-origin SOP errors, and ``frame_locator().click()`` timed
|
||||||
|
out.
|
||||||
|
|
||||||
|
Root cause was a missing pref. FF150 ships with
|
||||||
|
``fission.webContentIsolationStrategy=1`` (IsolateEverything), which
|
||||||
|
site-isolates cross-origin iframes into separate webIsolated content
|
||||||
|
processes even when ``fission.autostart=False``. The Juggler code paths
|
||||||
|
inherited from the FF146 era assume same-process iframes. The wrapper's
|
||||||
|
``_BASELINE`` now pins the pref to 0 (IsolateNothing).
|
||||||
|
|
||||||
|
These tests exist so a future Firefox upgrade or a fingerprint A/B
|
||||||
|
that flips this pref by accident cannot ship without a red CI signal.
|
||||||
|
|
||||||
|
Layers:
|
||||||
|
* ``unit`` — ``_BASELINE`` contains the pref with the right value. No browser.
|
||||||
|
* ``e2e`` — launch the real binary against a LOCAL HTTP harness on
|
||||||
|
``127.0.0.1`` (two ports = two SOP origins) and verify the
|
||||||
|
four protocol operations that regressed: frame URL tracking,
|
||||||
|
``handle.content_frame()``, ``frame.evaluate()``, and
|
||||||
|
``frame_locator(...).locator(...)`` element resolution.
|
||||||
|
|
||||||
|
The e2e tests run entirely offline. They never call out to a real site;
|
||||||
|
the cross-origin shape is reproduced with two local HTTP servers on
|
||||||
|
random free ports.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import socket
|
||||||
|
import threading
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright._fpforge import generate_profile
|
||||||
|
from invisible_playwright.prefs import _BASELINE, translate_profile_to_prefs
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Unit layer — fast, no browser, runs on every CI
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_baseline_pins_web_content_isolation_strategy_to_zero():
|
||||||
|
"""Regression sentinel.
|
||||||
|
|
||||||
|
``fission.webContentIsolationStrategy`` MUST be 0 (IsolateNothing).
|
||||||
|
The FF150 default is 1 (IsolateEverything), which site-isolates
|
||||||
|
cross-origin iframes into separate webIsolated content processes
|
||||||
|
and breaks Playwright frame tracking from the parent process.
|
||||||
|
"""
|
||||||
|
assert _BASELINE["fission.webContentIsolationStrategy"] == 0, (
|
||||||
|
"fission.webContentIsolationStrategy must be 0 (IsolateNothing). "
|
||||||
|
"If you bumped it for an A/B, cross-origin iframes will appear "
|
||||||
|
"in page.frames with empty URLs and content_frame() will return "
|
||||||
|
"None — see the changelog entry that introduced this test."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_baseline_keeps_fission_autostart_off():
|
||||||
|
"""Belt for the suspenders above. All three prefs are required."""
|
||||||
|
assert _BASELINE["fission.autostart"] is False
|
||||||
|
assert _BASELINE["fission.autostart.session"] is False
|
||||||
|
assert _BASELINE["dom.ipc.processCount.webIsolated"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_translated_profile_propagates_isolation_strategy():
|
||||||
|
"""The fix must survive translate_profile_to_prefs, not just live in _BASELINE."""
|
||||||
|
p = generate_profile(seed=42)
|
||||||
|
prefs = translate_profile_to_prefs(p)
|
||||||
|
assert prefs["fission.webContentIsolationStrategy"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_extra_prefs_override_can_break_isolation_only_explicitly():
|
||||||
|
"""If a caller wants to A/B isolation, they have to set it explicitly.
|
||||||
|
The wrapper does not silently flip it back on.
|
||||||
|
"""
|
||||||
|
p = generate_profile(seed=42)
|
||||||
|
prefs_default = translate_profile_to_prefs(p)
|
||||||
|
assert prefs_default["fission.webContentIsolationStrategy"] == 0
|
||||||
|
|
||||||
|
prefs_ab = translate_profile_to_prefs(
|
||||||
|
p, extra_prefs={"fission.webContentIsolationStrategy": 1}
|
||||||
|
)
|
||||||
|
assert prefs_ab["fission.webContentIsolationStrategy"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# E2E layer — needs cached binary + bind to localhost ports
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _free_port() -> int:
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.bind(("127.0.0.1", 0))
|
||||||
|
port = s.getsockname()[1]
|
||||||
|
s.close()
|
||||||
|
return port
|
||||||
|
|
||||||
|
|
||||||
|
class _SilentHandler(BaseHTTPRequestHandler):
|
||||||
|
"""Suppress per-request access logging so pytest output stays clean."""
|
||||||
|
PAYLOAD = b"" # set per-instance via subclassing
|
||||||
|
|
||||||
|
def log_message(self, *_a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||||
|
self.send_header("Cache-Control", "no-store")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(self.PAYLOAD)
|
||||||
|
|
||||||
|
|
||||||
|
def _serve(payload: bytes, port: int) -> HTTPServer:
|
||||||
|
"""Start an HTTP server on 127.0.0.1:port serving ``payload`` on every GET."""
|
||||||
|
handler_cls = type(
|
||||||
|
"_H", (_SilentHandler,), {"PAYLOAD": payload}
|
||||||
|
)
|
||||||
|
srv = HTTPServer(("127.0.0.1", port), handler_cls)
|
||||||
|
t = threading.Thread(target=srv.serve_forever, daemon=True)
|
||||||
|
t.start()
|
||||||
|
return srv
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cross_origin_harness():
|
||||||
|
"""Spin up TWO local HTTP servers on different localhost ports.
|
||||||
|
|
||||||
|
Two ports = two distinct origins under SOP (same host, different port
|
||||||
|
→ different origin). The parent page on port A embeds an iframe with
|
||||||
|
src pointing at port B. Same cross-origin browsing-context shape as
|
||||||
|
a parent-page-plus-third-party-iframe layout, fully offline.
|
||||||
|
"""
|
||||||
|
pa, pb = _free_port(), _free_port()
|
||||||
|
parent_html = f"""<!doctype html><html><head><title>parent</title></head><body>
|
||||||
|
<h1>parent</h1>
|
||||||
|
<iframe id="ifr_plain" src="http://127.0.0.1:{pb}/child" width="300" height="120"></iframe>
|
||||||
|
<iframe id="ifr_sandbox" src="http://127.0.0.1:{pb}/child" width="300" height="120"
|
||||||
|
sandbox="allow-scripts allow-same-origin"></iframe>
|
||||||
|
<iframe id="ifr_titled" src="http://127.0.0.1:{pb}/child" width="300" height="120"
|
||||||
|
title="cross-origin titled iframe"></iframe>
|
||||||
|
</body></html>""".encode("utf-8")
|
||||||
|
child_html = b"""<!doctype html><html><body>
|
||||||
|
<button id="ok">confirm</button>
|
||||||
|
<button class="btn-primary">primary</button>
|
||||||
|
<script>document.getElementById('ok').addEventListener('click', () => document.title = 'clicked')</script>
|
||||||
|
</body></html>"""
|
||||||
|
sa = _serve(parent_html, pa)
|
||||||
|
sb = _serve(child_html, pb)
|
||||||
|
try:
|
||||||
|
yield {"parent_url": f"http://127.0.0.1:{pa}/", "child_origin": f"http://127.0.0.1:{pb}"}
|
||||||
|
finally:
|
||||||
|
sa.shutdown()
|
||||||
|
sb.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_cross_origin_iframe_url_appears_in_page_frames(firefox_binary, cross_origin_harness):
|
||||||
|
"""``page.frames`` must list the cross-origin iframe with its real URL.
|
||||||
|
|
||||||
|
Before the pref fix, the URL came back as '' because the navigation
|
||||||
|
observer for the iframe fired in a different content process than
|
||||||
|
the parent's FrameTree was registered in.
|
||||||
|
"""
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||||
|
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
urls = [f.url for f in page.frames]
|
||||||
|
assert any(cross_origin_harness["child_origin"] in (u or "") for u in urls), (
|
||||||
|
f"no frame had the child origin in its URL; page.frames urls = {urls!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_cross_origin_iframe_content_frame_resolves(firefox_binary, cross_origin_harness):
|
||||||
|
"""``handle.content_frame()`` must return a Frame (not None) for every
|
||||||
|
cross-origin iframe shape we care about: plain, sandboxed, titled.
|
||||||
|
"""
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||||
|
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
for sel in ("iframe#ifr_plain", "iframe#ifr_sandbox", "iframe#ifr_titled"):
|
||||||
|
handle = page.query_selector(sel)
|
||||||
|
assert handle is not None, f"{sel!r} not found in DOM"
|
||||||
|
cf = handle.content_frame()
|
||||||
|
assert cf is not None, f"{sel!r}: content_frame() returned None"
|
||||||
|
assert cross_origin_harness["child_origin"] in (cf.url or ""), (
|
||||||
|
f"{sel!r}: content_frame().url = {cf.url!r}, "
|
||||||
|
f"expected child origin {cross_origin_harness['child_origin']!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_cross_origin_iframe_evaluate_returns_real_values(firefox_binary, cross_origin_harness):
|
||||||
|
"""``frame.evaluate()`` inside the cross-origin iframe must work.
|
||||||
|
|
||||||
|
Pre-fix: every evaluate failed with a cross-origin SOP error because
|
||||||
|
the iframe ended up with a stale/wrong execution context.
|
||||||
|
"""
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||||
|
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
cf = page.query_selector("iframe#ifr_plain").content_frame()
|
||||||
|
assert cf is not None
|
||||||
|
href = cf.evaluate("() => location.href")
|
||||||
|
assert cross_origin_harness["child_origin"] in href
|
||||||
|
title = cf.evaluate("() => document.title")
|
||||||
|
assert isinstance(title, str)
|
||||||
|
n_buttons = cf.evaluate("() => document.querySelectorAll('button').length")
|
||||||
|
assert n_buttons == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_cross_origin_iframe_frame_locator_resolves_button(firefox_binary, cross_origin_harness):
|
||||||
|
"""``frame_locator(...).locator(...)`` must reach the button inside the iframe."""
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||||
|
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||||
|
|
||||||
|
for selector in ("button#ok", "button.btn-primary"):
|
||||||
|
cnt = page.frame_locator("iframe#ifr_plain").locator(selector).count()
|
||||||
|
assert cnt == 1, f"locator({selector!r}) found {cnt} elements (expected 1)"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_cross_origin_iframe_dispatch_event_click_works(firefox_binary, cross_origin_harness):
|
||||||
|
"""End-to-end interaction via ``dispatch_event`` must succeed.
|
||||||
|
|
||||||
|
Plain ``.click()`` can trip Playwright's actionability heuristic on
|
||||||
|
some third-party UIs (same on vanilla Playwright Firefox — not our
|
||||||
|
regression), but ``dispatch_event('click')`` always works once the
|
||||||
|
iframe is reachable.
|
||||||
|
"""
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser:
|
||||||
|
ctx = browser.new_context()
|
||||||
|
page = ctx.new_page()
|
||||||
|
page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000)
|
||||||
|
page.wait_for_selector("iframe#ifr_plain", timeout=10_000)
|
||||||
|
|
||||||
|
page.frame_locator("iframe#ifr_plain").locator("button#ok").dispatch_event(
|
||||||
|
"click", timeout=4_000
|
||||||
|
)
|
||||||
|
cf = page.query_selector("iframe#ifr_plain").content_frame()
|
||||||
|
assert cf.evaluate("() => document.title") == "clicked"
|
||||||
171
tests/test_detectors_e2e.py
Normal file
171
tests/test_detectors_e2e.py
Normal file
|
|
@ -0,0 +1,171 @@
|
||||||
|
"""E2E: run the REAL open-source detectors against the patched binary, on CI.
|
||||||
|
|
||||||
|
Instead of our own hand-rolled signal checks, this loads the actual detection
|
||||||
|
libraries and uses their FULL API surface:
|
||||||
|
|
||||||
|
* BotD (@fingerprintjs/botd, MIT) — the client-side bot detector that
|
||||||
|
FingerprintJS Pro itself uses. We assert the aggregate verdict
|
||||||
|
(``detect().bot == False``) AND every one of its ~18 individual detectors
|
||||||
|
(``getDetections()``) returns ``bot == False``. The per-detector view is
|
||||||
|
why we could delete our hand-rolled ``test_botd_*`` mirrors — the real
|
||||||
|
library now covers each detector, with the same granularity.
|
||||||
|
* FingerprintJS open-source (MIT) — ``get()`` must return a ``visitorId``
|
||||||
|
that is STABLE across two fresh launches with the same seed (an
|
||||||
|
over-randomized spoof drifts), and a RICH component set (the fingerprint
|
||||||
|
surface is real, not a stub).
|
||||||
|
|
||||||
|
Everything is hermetic: the libraries are vendored (tests/vendor/) and served
|
||||||
|
from a localhost HTTP server — no external CDN call (Firefox tracking-protection
|
||||||
|
blocks the CDN anyway) and no IP/network dependency. Runs identically on a dev
|
||||||
|
box and on a GitHub runner.
|
||||||
|
|
||||||
|
NOT covered: FingerprintJS *Pro* (commercial, server-side, IP/residential
|
||||||
|
analysis) — can't be self-hosted, stays the local realness gate.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import http.server
|
||||||
|
import socketserver
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
_VENDOR = Path(__file__).parent / "vendor"
|
||||||
|
_BOTD = "botd-2.0.0.esm.js"
|
||||||
|
_FPJS = "fingerprintjs-5.2.0.umd.min.js"
|
||||||
|
|
||||||
|
_PAGE = f"""<!doctype html><html><head><meta charset="utf-8">
|
||||||
|
<title>detectors</title>
|
||||||
|
<script src="/{_FPJS}"></script>
|
||||||
|
</head><body><h1 id="state">loading</h1>
|
||||||
|
<script type="module">
|
||||||
|
window.__botd = null; window.__fp = null; window.__err = "";
|
||||||
|
(async () => {{
|
||||||
|
try {{
|
||||||
|
const Botd = await import("/{_BOTD}");
|
||||||
|
const botd = await Botd.load(); // load() collects internally
|
||||||
|
const verdict = botd.detect(); // {{bot:false}} | {{bot:true,botKind}}
|
||||||
|
const raw = botd.getDetections() || {{}}; // per-detector verdicts
|
||||||
|
const detections = {{}};
|
||||||
|
for (const k in raw) detections[k] = {{ bot: raw[k].bot, botKind: raw[k].botKind || null }};
|
||||||
|
window.__botd = {{ bot: verdict.bot, botKind: verdict.botKind || null, detections }};
|
||||||
|
}} catch (e) {{ window.__err += " botd:" + e; }}
|
||||||
|
try {{
|
||||||
|
const fp = await FingerprintJS.load();
|
||||||
|
const r = await fp.get();
|
||||||
|
const keys = Object.keys(r.components || {{}});
|
||||||
|
const errored = keys.filter(k => r.components[k] && "error" in r.components[k]);
|
||||||
|
window.__fp = {{ visitorId: r.visitorId, componentKeys: keys, erroredComponents: errored }};
|
||||||
|
}} catch (e) {{ window.__err += " fp:" + e; }}
|
||||||
|
document.getElementById("state").textContent = "done";
|
||||||
|
}})();
|
||||||
|
</script></body></html>"""
|
||||||
|
|
||||||
|
|
||||||
|
class _DetectorSite:
|
||||||
|
"""Localhost server: `/` → the page; `/<lib>` → the vendored bundle."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
page = _PAGE.encode()
|
||||||
|
vendor = _VENDOR
|
||||||
|
|
||||||
|
class H(http.server.BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self): # noqa: N802
|
||||||
|
if self.path == "/" or self.path.startswith("/?"):
|
||||||
|
body, ctype = page, "text/html; charset=utf-8"
|
||||||
|
else:
|
||||||
|
f = vendor / Path(self.path.lstrip("/")).name
|
||||||
|
if not f.is_file():
|
||||||
|
self.send_error(404); return
|
||||||
|
body = f.read_bytes()
|
||||||
|
ctype = "text/javascript; charset=utf-8"
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", ctype)
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
self._srv = socketserver.TCPServer(("127.0.0.1", 0), H)
|
||||||
|
self.port = self._srv.server_address[1]
|
||||||
|
threading.Thread(target=self._srv.serve_forever, daemon=True).start()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self):
|
||||||
|
return f"http://127.0.0.1:{self.port}/"
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._srv.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def detector_site():
|
||||||
|
s = _DetectorSite()
|
||||||
|
yield s
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_detectors(firefox_binary, url):
|
||||||
|
"""Launch the binary, load the page, return (botd, fp, err)."""
|
||||||
|
with InvisiblePlaywright(seed=42, binary_path=firefox_binary) as browser:
|
||||||
|
page = browser.new_page()
|
||||||
|
page.goto(url, wait_until="load", timeout=45000)
|
||||||
|
page.wait_for_function(
|
||||||
|
"() => document.getElementById('state').textContent === 'done'",
|
||||||
|
timeout=45000,
|
||||||
|
)
|
||||||
|
botd = page.evaluate("() => window.__botd")
|
||||||
|
fp = page.evaluate("() => window.__fp")
|
||||||
|
err = page.evaluate("() => window.__err")
|
||||||
|
return botd, fp, err
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_botd_no_detector_flags_automation(firefox_binary, detector_site):
|
||||||
|
"""The real BotD must not flag the build — aggregate AND every one of its
|
||||||
|
individual detectors (webDriver/userAgent/appVersion/plugins/process/... ).
|
||||||
|
"""
|
||||||
|
botd, _fp, err = _run_detectors(firefox_binary, detector_site.url)
|
||||||
|
assert botd is not None, f"BotD produced no result (err:{err!r})"
|
||||||
|
assert botd.get("bot") is False, (
|
||||||
|
f"BotD aggregate flagged a bot: botKind={botd.get('botKind')!r}"
|
||||||
|
)
|
||||||
|
detections = botd.get("detections") or {}
|
||||||
|
assert detections, f"BotD getDetections() returned nothing (err:{err!r})"
|
||||||
|
flagged = {k: v.get("botKind") for k, v in detections.items() if v.get("bot")}
|
||||||
|
assert not flagged, f"BotD individual detectors flagged automation: {flagged}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_fingerprintjs_visitorid_stable_across_launches(firefox_binary, detector_site):
|
||||||
|
"""FingerprintJS visitorId must be present and identical across two fresh
|
||||||
|
launches with the same seed — a real browser is stable; an over-randomized
|
||||||
|
spoof drifts (and a drifting fingerprint is itself a bot tell)."""
|
||||||
|
_b1, fp1, err1 = _run_detectors(firefox_binary, detector_site.url)
|
||||||
|
_b2, fp2, err2 = _run_detectors(firefox_binary, detector_site.url)
|
||||||
|
assert fp1 and fp1.get("visitorId"), f"no visitorId on run 1 (err:{err1!r})"
|
||||||
|
assert fp2 and fp2.get("visitorId"), f"no visitorId on run 2 (err:{err2!r})"
|
||||||
|
assert fp1["visitorId"] == fp2["visitorId"], (
|
||||||
|
f"FingerprintJS visitorId drifted across launches: "
|
||||||
|
f"{fp1['visitorId']!r} != {fp2['visitorId']!r} (per-session entropy = bot tell)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_fingerprintjs_collects_rich_fingerprint(firefox_binary, detector_site):
|
||||||
|
"""FingerprintJS must collect a RICH component surface (a real browser
|
||||||
|
exposes many signals; a stripped/blocked surface is itself suspicious).
|
||||||
|
We don't assert zero errored components (some are legitimately unsupported
|
||||||
|
per browser), only that the surface is substantial and the id computed."""
|
||||||
|
_b, fp, err = _run_detectors(firefox_binary, detector_site.url)
|
||||||
|
assert fp and fp.get("visitorId"), f"FingerprintJS produced no id (err:{err!r})"
|
||||||
|
keys = fp.get("componentKeys") or []
|
||||||
|
assert len(keys) >= 15, (
|
||||||
|
f"FingerprintJS collected only {len(keys)} components — surface too thin "
|
||||||
|
f"(suppressed signals are themselves a tell): {keys}"
|
||||||
|
)
|
||||||
|
|
@ -418,7 +418,7 @@ def test_github_token_none_when_unset(monkeypatch):
|
||||||
# Bonus coverage: unsupported platform raises NotImplementedError before any HTTP
|
# Bonus coverage: unsupported platform raises NotImplementedError before any HTTP
|
||||||
@pytest.mark.unit
|
@pytest.mark.unit
|
||||||
def test_ensure_binary_unsupported_platform_raises(monkeypatch):
|
def test_ensure_binary_unsupported_platform_raises(monkeypatch):
|
||||||
monkeypatch.setattr("sys.platform", "darwin")
|
monkeypatch.setattr("sys.platform", "freebsd") # win32/linux/darwin are supported
|
||||||
import platform
|
import platform
|
||||||
monkeypatch.setattr(platform, "machine", lambda: "AMD64")
|
monkeypatch.setattr(platform, "machine", lambda: "AMD64")
|
||||||
with pytest.raises(NotImplementedError, match="unsupported platform"):
|
with pytest.raises(NotImplementedError, match="unsupported platform"):
|
||||||
|
|
@ -832,3 +832,11 @@ def test_parse_owner_repo_handles_repos_with_dashes_and_underscores():
|
||||||
)
|
)
|
||||||
assert owner == "my-org"
|
assert owner == "my-org"
|
||||||
assert repo == "my_cool.repo"
|
assert repo == "my_cool.repo"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ensure_binary_refuses_known_broken_version():
|
||||||
|
"""A known-broken release (firefox-8, no juggler) must be refused with a
|
||||||
|
clear error BEFORE any download — never silently handed to the user."""
|
||||||
|
with pytest.raises(RuntimeError, match="known-broken"):
|
||||||
|
ensure_binary("firefox-8")
|
||||||
|
|
|
||||||
|
|
@ -8,33 +8,9 @@ handling) do not need a binary and always run.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from invisible_playwright import InvisiblePlaywright
|
from invisible_playwright import InvisiblePlaywright
|
||||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def firefox_binary():
|
|
||||||
"""Locate the patched Firefox binary or skip the calling test.
|
|
||||||
|
|
||||||
We do NOT trigger a network download here: ``ensure_binary`` would
|
|
||||||
pull a multi-hundred-megabyte archive from a private release,
|
|
||||||
which is not appropriate inside a unit/E2E test run. Instead we
|
|
||||||
look for an already-cached binary; if missing we skip.
|
|
||||||
"""
|
|
||||||
if sys.platform not in BINARY_ENTRY_REL:
|
|
||||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
|
||||||
from invisible_playwright.download import cache_dir_for_version
|
|
||||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
|
||||||
if not entry.exists():
|
|
||||||
pytest.skip(
|
|
||||||
"patched Firefox binary not cached; run `invisible-playwright fetch` "
|
|
||||||
"to enable E2E tests"
|
|
||||||
)
|
|
||||||
return str(entry)
|
|
||||||
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────────
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,9 @@ Run only this file:
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from invisible_playwright import InvisiblePlaywright
|
from invisible_playwright import InvisiblePlaywright
|
||||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
|
||||||
|
|
||||||
|
|
||||||
PIN = {
|
PIN = {
|
||||||
|
|
@ -45,29 +42,6 @@ PIN = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def firefox_binary():
|
|
||||||
"""See test_fingerprint_surface.firefox_binary for the lookup chain."""
|
|
||||||
import os
|
|
||||||
env_path = os.environ.get("INVPW_BINARY_PATH")
|
|
||||||
if env_path:
|
|
||||||
from pathlib import Path
|
|
||||||
if Path(env_path).exists():
|
|
||||||
return env_path
|
|
||||||
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
|
|
||||||
if sys.platform not in BINARY_ENTRY_REL:
|
|
||||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
|
||||||
from invisible_playwright.download import cache_dir_for_version
|
|
||||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
|
||||||
if not entry.exists():
|
|
||||||
pytest.skip(
|
|
||||||
"patched Firefox not cached; run "
|
|
||||||
"`python -m invisible_playwright fetch` first, or set "
|
|
||||||
"INVPW_BINARY_PATH to a local build"
|
|
||||||
)
|
|
||||||
return str(entry)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def page(firefox_binary):
|
def page(firefox_binary):
|
||||||
with InvisiblePlaywright(
|
with InvisiblePlaywright(
|
||||||
|
|
@ -306,17 +280,6 @@ def test_navigator_oscpu_matches_userAgent(page):
|
||||||
assert "Mac" in oscpu
|
assert "Mac" in oscpu
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_userAgent_contains_appVersion_chromium_only(page):
|
|
||||||
"""Chromium invariant: UA contains appVersion. Firefox uses a short
|
|
||||||
appVersion form so the check is gated on `'chrome' in window`."""
|
|
||||||
if not _ev(page, "'chrome' in window"):
|
|
||||||
pytest.skip("Chromium-only invariant")
|
|
||||||
ua = _ev(page, "navigator.userAgent")
|
|
||||||
av = _ev(page, "navigator.appVersion")
|
|
||||||
assert av in ua
|
|
||||||
|
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
# 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString)
|
# 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString)
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|
|
||||||
|
|
@ -27,12 +27,10 @@ Run only this file:
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from invisible_playwright import InvisiblePlaywright
|
from invisible_playwright import InvisiblePlaywright
|
||||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
|
||||||
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────────
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -53,32 +51,6 @@ PIN = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def firefox_binary():
|
|
||||||
"""Locate the patched Firefox binary. Three lookup paths:
|
|
||||||
1. ``INVPW_BINARY_PATH`` env var (for dev iteration against a local build)
|
|
||||||
2. Cached binary under ``cache_dir_for_version()`` (post-fetch)
|
|
||||||
3. Skip cleanly (no implicit network download)."""
|
|
||||||
import os
|
|
||||||
env_path = os.environ.get("INVPW_BINARY_PATH")
|
|
||||||
if env_path:
|
|
||||||
from pathlib import Path
|
|
||||||
if Path(env_path).exists():
|
|
||||||
return env_path
|
|
||||||
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
|
|
||||||
if sys.platform not in BINARY_ENTRY_REL:
|
|
||||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
|
||||||
from invisible_playwright.download import cache_dir_for_version
|
|
||||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
|
||||||
if not entry.exists():
|
|
||||||
pytest.skip(
|
|
||||||
"patched Firefox not cached; run "
|
|
||||||
"`python -m invisible_playwright fetch` first, or set "
|
|
||||||
"INVPW_BINARY_PATH to a local build"
|
|
||||||
)
|
|
||||||
return str(entry)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def page(firefox_binary):
|
def page(firefox_binary):
|
||||||
"""One headless browser shared across the whole module.
|
"""One headless browser shared across the whole module.
|
||||||
|
|
@ -99,170 +71,6 @@ def _ev(page, expr):
|
||||||
return page.evaluate(expr)
|
return page.evaluate(expr)
|
||||||
|
|
||||||
|
|
||||||
# ===========================================================================
|
|
||||||
# BotD detectors (github.com/fingerprintjs/BotD/tree/main/src/detectors)
|
|
||||||
# Each detector becomes one pytest. The failure name maps to the BotKind
|
|
||||||
# constant BotD would emit on the wire.
|
|
||||||
# ===========================================================================
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_webdriver_property_is_falsey(page):
|
|
||||||
"""BotD: navigator.webdriver === true → HeadlessChrome verdict."""
|
|
||||||
assert not _ev(page, "navigator.webdriver"), (
|
|
||||||
"navigator.webdriver is truthy — instant HeadlessChrome verdict"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_app_version_no_headless_token(page):
|
|
||||||
"""BotD detectAppVersion: /headless|electron|slimerjs/i in appVersion."""
|
|
||||||
av = _ev(page, "navigator.appVersion")
|
|
||||||
for token in ("headless", "electron", "slimerjs"):
|
|
||||||
assert not re.search(token, av, re.I), (
|
|
||||||
f"navigator.appVersion contains {token!r}: {av!r}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_user_agent_no_headless_or_selenium_token(page):
|
|
||||||
"""BotD: /headless|selenium|phantom/i in UA."""
|
|
||||||
ua = _ev(page, "navigator.userAgent")
|
|
||||||
for token in ("headless", "selenium", "phantom"):
|
|
||||||
assert not re.search(token, ua, re.I), (
|
|
||||||
f"navigator.userAgent contains {token!r}: {ua!r}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_function_bind_is_function(page):
|
|
||||||
"""BotD detectFunctionBind: missing Function.prototype.bind = PhantomJS."""
|
|
||||||
assert _ev(page, "typeof Function.prototype.bind === 'function'")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_product_sub_is_gecko_value(page):
|
|
||||||
"""BotD detectProductSub: Firefox must return '20100101'; '20030107'
|
|
||||||
on a Firefox UA = Chrome-stub leaked under spoof."""
|
|
||||||
assert _ev(page, "navigator.productSub") == "20100101", (
|
|
||||||
"navigator.productSub must be '20100101' on Firefox"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_no_process_global(page):
|
|
||||||
"""BotD detectProcess: window.process indicates Electron."""
|
|
||||||
assert not _ev(page,
|
|
||||||
"typeof window.process !== 'undefined' && "
|
|
||||||
"window.process.type === 'renderer'"
|
|
||||||
)
|
|
||||||
assert not _ev(page,
|
|
||||||
"typeof window.process !== 'undefined' && "
|
|
||||||
"window.process.versions != null && "
|
|
||||||
"typeof window.process.versions.electron !== 'undefined'"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_eval_length_matches_engine(page):
|
|
||||||
"""BotD detectEvalLengthInconsistency: `eval.toString().length` must be
|
|
||||||
37 on Gecko (33 on Chromium, 39 on IE). Mismatch = engine spoof."""
|
|
||||||
assert _ev(page, "eval.toString().length") == 37
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_languages_array_non_empty(page):
|
|
||||||
"""BotD detectLanguagesLengthInconsistency: empty navigator.languages
|
|
||||||
is the classic HeadlessChrome tell."""
|
|
||||||
assert _ev(page, "navigator.languages.length") > 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_plugins_instance_of_PluginArray(page):
|
|
||||||
"""BotD detectPluginsArray: navigator.plugins must be a real PluginArray."""
|
|
||||||
assert _ev(page, "navigator.plugins instanceof PluginArray")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_mime_types_consistent_prototype_chain(page):
|
|
||||||
"""BotD areMimeTypesConsistent: navigator.mimeTypes and each entry
|
|
||||||
must have proper prototype chain. Spoofers using plain arrays fail."""
|
|
||||||
consistent = _ev(page, """() => {
|
|
||||||
if (typeof navigator.mimeTypes === 'undefined' ||
|
|
||||||
typeof MimeTypeArray === 'undefined') return false;
|
|
||||||
let ok = Object.getPrototypeOf(navigator.mimeTypes) === MimeTypeArray.prototype;
|
|
||||||
for (let i = 0; i < navigator.mimeTypes.length; i++) {
|
|
||||||
ok = ok && Object.getPrototypeOf(navigator.mimeTypes[i]) === MimeType.prototype;
|
|
||||||
}
|
|
||||||
return ok;
|
|
||||||
}""")
|
|
||||||
assert consistent, "navigator.mimeTypes prototype chain inconsistent"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_no_distinctive_window_props(page):
|
|
||||||
"""BotD checkDistinctiveProperties: scan window for automation globals."""
|
|
||||||
DISTINCTIVE = [
|
|
||||||
"awesomium", "RunPerfTest", "CefSharp", "fmget_targets", "geb",
|
|
||||||
"__nightmare", "nightmare", "__phantomas", "callPhantom", "_phantom",
|
|
||||||
"wdioElectron", "__webdriverFunc", "_WEBDRIVER_ELEM_CACHE",
|
|
||||||
"ChromeDriverw", "domAutomation", "domAutomationController",
|
|
||||||
]
|
|
||||||
leaks = [n for n in DISTINCTIVE
|
|
||||||
if _ev(page, f"typeof window[{n!r}] !== 'undefined'")]
|
|
||||||
assert not leaks, f"Distinctive bot globals leaked: {leaks}"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_no_distinctive_document_props(page):
|
|
||||||
"""BotD: document-side automation globals (webdriver/selenium/cdc)."""
|
|
||||||
DOC_LEAKS = [
|
|
||||||
"__webdriver_evaluate", "__selenium_evaluate",
|
|
||||||
"__webdriver_script_function", "__webdriver_script_func",
|
|
||||||
"__webdriver_script_fn", "__fxdriver_evaluate",
|
|
||||||
"__driver_unwrapped", "__webdriver_unwrapped",
|
|
||||||
"__driver_evaluate", "__selenium_unwrapped",
|
|
||||||
"__fxdriver_unwrapped",
|
|
||||||
"$cdc_asdjflasutopfhvcZLmcf", "$cdc_asdjflasutopfhvcZLmcfl_",
|
|
||||||
"$chrome_asyncScriptInfo", "__$webdriverAsyncExecutor",
|
|
||||||
]
|
|
||||||
leaks = [n for n in DOC_LEAKS
|
|
||||||
if _ev(page, f"typeof document[{n!r}] !== 'undefined'")]
|
|
||||||
assert not leaks, f"document carries automation property names: {leaks}"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_document_html_attributes_clean(page):
|
|
||||||
"""BotD detectDocumentAttributes: html element attrs contain 'selenium'
|
|
||||||
/ 'webdriver' / 'driver' → Selenium verdict."""
|
|
||||||
attrs = _ev(page,
|
|
||||||
"Array.from(document.documentElement.attributes).map(a => a.name + '=' + a.value)")
|
|
||||||
bad = [a for a in attrs if any(t in a.lower()
|
|
||||||
for t in ("selenium", "webdriver", "driver"))]
|
|
||||||
assert not bad, f"HTML attributes contain bot tokens: {bad}"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_window_size_nonzero(page):
|
|
||||||
"""BotD detectWindowSize: headless without window manager → 0x0."""
|
|
||||||
ow = _ev(page, "window.outerWidth")
|
|
||||||
oh = _ev(page, "window.outerHeight")
|
|
||||||
assert ow > 0 and oh > 0, (
|
|
||||||
f"outerWidth/Height = {ow}/{oh} — headless without window manager"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.e2e
|
|
||||||
def test_botd_webgl_debug_renderer_info_available(page):
|
|
||||||
"""BotD detectWebGL: WEBGL_debug_renderer_info extension must exist."""
|
|
||||||
has_ext = _ev(page, """() => {
|
|
||||||
const c = document.createElement('canvas');
|
|
||||||
const gl = c.getContext('webgl') || c.getContext('experimental-webgl');
|
|
||||||
return !!gl && !!gl.getExtension('WEBGL_debug_renderer_info');
|
|
||||||
}""")
|
|
||||||
assert has_ext
|
|
||||||
|
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
# sannysoft.com — classic Puppeteer detection harness
|
# sannysoft.com — classic Puppeteer detection harness
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|
|
||||||
288
tests/test_geo.py
Normal file
288
tests/test_geo.py
Normal file
|
|
@ -0,0 +1,288 @@
|
||||||
|
"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution).
|
||||||
|
|
||||||
|
Covers: the precedence policy (resolve_session_timezone), proxy→requests
|
||||||
|
translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked
|
||||||
|
mmdb). No real network or mmdb is touched.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright import _geo
|
||||||
|
from invisible_playwright._geo import (
|
||||||
|
GeoTimezoneError,
|
||||||
|
_proxies_for_requests,
|
||||||
|
_proxy_is_set,
|
||||||
|
discover_egress_ip,
|
||||||
|
ip_to_timezone,
|
||||||
|
resolve_session_timezone,
|
||||||
|
)
|
||||||
|
|
||||||
|
SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"}
|
||||||
|
HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"}
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# _proxy_is_set
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"proxy,expected",
|
||||||
|
[
|
||||||
|
(None, False),
|
||||||
|
({}, False),
|
||||||
|
({"server": ""}, False),
|
||||||
|
({"server": " "}, False),
|
||||||
|
({"server": "direct://"}, False),
|
||||||
|
({"server": "DIRECT://"}, False),
|
||||||
|
({"server": "socks5://h:1"}, True),
|
||||||
|
({"server": "http://h:8080"}, True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_proxy_is_set(proxy, expected):
|
||||||
|
assert _proxy_is_set(proxy) is expected
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# _proxies_for_requests — scheme + credential translation
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_socks5_uses_socks5h_remote_dns():
|
||||||
|
out = _proxies_for_requests(SOCKS)
|
||||||
|
assert out["http"] == "socks5h://u:p@gw.example:1080"
|
||||||
|
assert out["https"] == out["http"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_socks4_scheme():
|
||||||
|
out = _proxies_for_requests({"server": "socks4://gw:1080"})
|
||||||
|
assert out["http"] == "socks4://gw:1080"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_http_and_https_schemes():
|
||||||
|
assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080"
|
||||||
|
out = _proxies_for_requests({"server": "https://gw:8443"})
|
||||||
|
assert out["https"] == "https://gw:8443"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_no_scheme_defaults_to_http():
|
||||||
|
out = _proxies_for_requests({"server": "gw.example:3128"})
|
||||||
|
assert out["http"] == "http://gw.example:3128"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_credentials_are_url_encoded():
|
||||||
|
out = _proxies_for_requests(
|
||||||
|
{"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"}
|
||||||
|
)
|
||||||
|
# '@', ':' and '/' in creds must be percent-encoded so they don't break
|
||||||
|
# the proxy URL parsing.
|
||||||
|
assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_proxies_no_credentials_has_no_auth_prefix():
|
||||||
|
out = _proxies_for_requests({"server": "socks5://gw:1080"})
|
||||||
|
assert out["http"] == "socks5h://gw:1080"
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# discover_egress_ip — mocked requests
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
class _FakeResp:
|
||||||
|
def __init__(self, text, status=200):
|
||||||
|
self.text = text
|
||||||
|
self._status = status
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
if self._status >= 400:
|
||||||
|
raise RuntimeError(f"HTTP {self._status}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_first_endpoint_wins(monkeypatch):
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
calls.append(url)
|
||||||
|
return _FakeResp("203.0.113.7\n")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
assert discover_egress_ip(SOCKS) == "203.0.113.7"
|
||||||
|
assert len(calls) == 1 # stopped at the first success
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch):
|
||||||
|
seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")])
|
||||||
|
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
return next(seq)
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
assert discover_egress_ip(HTTP) == "198.51.100.42"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_all_fail_raises(monkeypatch):
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
raise OSError("connection refused")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
discover_egress_ip(SOCKS)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_discover_egress_ip_no_proxy_is_direct(monkeypatch):
|
||||||
|
# proxy=None → direct request, requests.get must get proxies=None.
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
def fake_get(url, **kw):
|
||||||
|
seen["proxies"] = kw.get("proxies", "MISSING")
|
||||||
|
return _FakeResp("192.0.2.55")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo.requests, "get", fake_get)
|
||||||
|
assert discover_egress_ip(None) == "192.0.2.55"
|
||||||
|
assert seen["proxies"] is None
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# ip_to_timezone — mocked mmdb reader
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
class _FakeReader:
|
||||||
|
def __init__(self, record):
|
||||||
|
self._record = record
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *a):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get(self, ip):
|
||||||
|
return self._record
|
||||||
|
|
||||||
|
|
||||||
|
def _install_fake_maxminddb(monkeypatch, record):
|
||||||
|
mod = types.ModuleType("maxminddb")
|
||||||
|
mod.open_database = lambda path: _FakeReader(record)
|
||||||
|
monkeypatch.setitem(sys.modules, "maxminddb", mod)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_reads_location_time_zone(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}})
|
||||||
|
assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_ip_absent_raises(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, None)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_missing_zone_raises(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, {"location": {}})
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_ip_to_timezone_invalid_iana_raises(monkeypatch):
|
||||||
|
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}})
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
ip_to_timezone("1.2.3.4", "x.mmdb")
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# resolve_session_timezone — the precedence policy
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.fixture
|
||||||
|
def stub_egress(monkeypatch):
|
||||||
|
"""Make egress resolution deterministic + offline; record if it ran."""
|
||||||
|
state = {"called": False}
|
||||||
|
|
||||||
|
def fake_discover(proxy=None, **kw):
|
||||||
|
state["called"] = True
|
||||||
|
state["proxy_arg"] = proxy
|
||||||
|
return "203.0.113.7"
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover)
|
||||||
|
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York")
|
||||||
|
# ensure_geoip_mmdb is imported from .download at call time
|
||||||
|
import invisible_playwright.download as dl
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
|
||||||
|
return state
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_explicit_iana_wins(stub_egress):
|
||||||
|
# An explicit zone wins and never triggers resolution (proxy or not).
|
||||||
|
assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo"
|
||||||
|
assert resolve_session_timezone("Asia/Tokyo", None) == "Asia/Tokyo"
|
||||||
|
assert stub_egress["called"] is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_empty_with_proxy_resolves_from_proxy(stub_egress):
|
||||||
|
assert resolve_session_timezone("", SOCKS) == "America/New_York"
|
||||||
|
assert stub_egress["called"] is True
|
||||||
|
assert stub_egress["proxy_arg"] == SOCKS # routed through the proxy
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_auto_with_proxy_resolves_from_proxy(stub_egress):
|
||||||
|
assert resolve_session_timezone("auto", HTTP) == "America/New_York"
|
||||||
|
assert stub_egress["proxy_arg"] == HTTP
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_empty_no_proxy_resolves_from_host(stub_egress):
|
||||||
|
# auto ALWAYS resolves — without a proxy, from the host's own public IP.
|
||||||
|
assert resolve_session_timezone("", None) == "America/New_York"
|
||||||
|
assert stub_egress["called"] is True
|
||||||
|
assert stub_egress["proxy_arg"] is None # direct request, no proxy
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_auto_no_proxy_resolves_from_host(stub_egress):
|
||||||
|
assert resolve_session_timezone("auto", None) == "America/New_York"
|
||||||
|
assert stub_egress["proxy_arg"] is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_direct_proxy_resolves_via_host(stub_egress):
|
||||||
|
# direct:// counts as "no proxy" → resolve from the host IP, don't skip.
|
||||||
|
assert resolve_session_timezone("auto", {"server": "direct://"}) == "America/New_York"
|
||||||
|
assert stub_egress["proxy_arg"] is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_no_proxy_failure_falls_back_to_host(monkeypatch):
|
||||||
|
# Without a proxy, a lookup failure must NOT break the launch → host TZ ("").
|
||||||
|
def boom(proxy=None, **kw):
|
||||||
|
raise GeoTimezoneError("offline")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||||
|
assert resolve_session_timezone("auto", None) == ""
|
||||||
|
assert resolve_session_timezone("", None) == ""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_resolve_proxy_failure_raises(monkeypatch):
|
||||||
|
# With a proxy set, a failure must raise — never a silent host-TZ fallback.
|
||||||
|
def boom(proxy=None, **kw):
|
||||||
|
raise GeoTimezoneError("no egress")
|
||||||
|
|
||||||
|
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
resolve_session_timezone("auto", SOCKS)
|
||||||
|
with pytest.raises(GeoTimezoneError):
|
||||||
|
resolve_session_timezone("", SOCKS)
|
||||||
131
tests/test_geoip_update.py
Normal file
131
tests/test_geoip_update.py
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
"""Unit tests for the intelligent geoip mmdb auto-update in `download.py`.
|
||||||
|
|
||||||
|
daijro/geoip-all-in-one rebuilds weekly; `ensure_geoip_mmdb` keeps the cache
|
||||||
|
fresh without a download (or API call) on every launch. These tests mock the
|
||||||
|
cache root, the latest-tag API, and the per-tag download so nothing touches the
|
||||||
|
network.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import invisible_playwright.download as dl
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cache(tmp_path, monkeypatch):
|
||||||
|
"""Point the cache at tmp_path and clear the env override."""
|
||||||
|
monkeypatch.setattr(dl, "cache_root", lambda: tmp_path)
|
||||||
|
monkeypatch.delenv("STEALTHFOX_GEOIP_MMDB", raising=False)
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cached(root, tag, name=dl.GEOIP_MMDB_NAME):
|
||||||
|
d = root / "geoip" / tag
|
||||||
|
d.mkdir(parents=True, exist_ok=True)
|
||||||
|
f = d / name
|
||||||
|
f.write_bytes(b"FAKE-MMDB")
|
||||||
|
return f
|
||||||
|
|
||||||
|
|
||||||
|
def _set_marker_age(root, days):
|
||||||
|
m = root / "geoip" / ".last_check"
|
||||||
|
m.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
m.touch()
|
||||||
|
old = time.time() - days * 86400
|
||||||
|
os.utime(m, (old, old))
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# env override
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_env_override_returns_file(tmp_path, monkeypatch):
|
||||||
|
f = tmp_path / "mine.mmdb"
|
||||||
|
f.write_bytes(b"X")
|
||||||
|
monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(f))
|
||||||
|
assert dl.ensure_geoip_mmdb() == f
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_env_override_missing_raises(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(tmp_path / "nope.mmdb"))
|
||||||
|
with pytest.raises(RuntimeError):
|
||||||
|
dl.ensure_geoip_mmdb()
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# freshness window
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_fresh_cache_no_network(cache, monkeypatch):
|
||||||
|
f = _make_cached(cache, "2026.06.03")
|
||||||
|
_set_marker_age(cache, 0) # just checked
|
||||||
|
|
||||||
|
def boom():
|
||||||
|
raise AssertionError("latest-tag API must NOT be called within the window")
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "_latest_geoip_tag", boom)
|
||||||
|
assert dl.ensure_geoip_mmdb(max_age_days=7) == f
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_stale_same_tag_no_download(cache, monkeypatch):
|
||||||
|
f = _make_cached(cache, "2026.06.03")
|
||||||
|
_set_marker_age(cache, 30) # stale → will re-check
|
||||||
|
monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.03")
|
||||||
|
# real _download_geoip_tag runs but target exists, so no actual download:
|
||||||
|
monkeypatch.setattr(dl, "_download_file", lambda *a, **k: (_ for _ in ()).throw(
|
||||||
|
AssertionError("must not download when tag already cached")))
|
||||||
|
assert dl.ensure_geoip_mmdb(max_age_days=7) == f
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_stale_new_tag_downloads_and_prunes(cache, monkeypatch):
|
||||||
|
old = _make_cached(cache, "2026.06.03")
|
||||||
|
_set_marker_age(cache, 30)
|
||||||
|
monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.10")
|
||||||
|
|
||||||
|
def fake_download(tag):
|
||||||
|
return _make_cached(cache, tag) # simulate fetch+extract of the new tag
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "_download_geoip_tag", fake_download)
|
||||||
|
got = dl.ensure_geoip_mmdb(max_age_days=7)
|
||||||
|
assert got.parent.name == "2026.06.10"
|
||||||
|
assert not old.parent.exists() # old tag pruned
|
||||||
|
assert got.exists()
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
# offline resilience
|
||||||
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_api_down_with_cache_uses_cache(cache, monkeypatch):
|
||||||
|
f = _make_cached(cache, "2026.06.03")
|
||||||
|
_set_marker_age(cache, 30)
|
||||||
|
|
||||||
|
def boom():
|
||||||
|
raise OSError("offline")
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "_latest_geoip_tag", boom)
|
||||||
|
assert dl.ensure_geoip_mmdb(max_age_days=7) == f # stale cache reused, no raise
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_cold_cache_api_down_falls_back_to_pinned(cache, monkeypatch):
|
||||||
|
# no cache at all + API unreachable → pinned GEOIP_MMDB_VERSION fallback.
|
||||||
|
def boom():
|
||||||
|
raise OSError("offline")
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "_latest_geoip_tag", boom)
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_download(tag):
|
||||||
|
captured["tag"] = tag
|
||||||
|
return _make_cached(cache, tag)
|
||||||
|
|
||||||
|
monkeypatch.setattr(dl, "_download_geoip_tag", fake_download)
|
||||||
|
got = dl.ensure_geoip_mmdb(max_age_days=7)
|
||||||
|
assert captured["tag"] == dl.GEOIP_MMDB_VERSION
|
||||||
|
assert got.exists()
|
||||||
|
|
@ -16,24 +16,11 @@ and covers each patched call site:
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from invisible_playwright import InvisiblePlaywright
|
from invisible_playwright import InvisiblePlaywright
|
||||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def firefox_binary():
|
|
||||||
if sys.platform not in BINARY_ENTRY_REL:
|
|
||||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
|
||||||
from invisible_playwright.download import cache_dir_for_version
|
|
||||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
|
||||||
if not entry.exists():
|
|
||||||
pytest.skip("patched Firefox binary not cached; run `invisible-playwright fetch`")
|
|
||||||
return str(entry)
|
|
||||||
|
|
||||||
|
|
||||||
def _data_url(html: str) -> str:
|
def _data_url(html: str) -> str:
|
||||||
|
|
@ -195,7 +182,11 @@ def test_hover_triggers_mouseenter(firefox_binary):
|
||||||
"onmouseenter=\"window.__h=true\">x</div>"
|
"onmouseenter=\"window.__h=true\">x</div>"
|
||||||
))
|
))
|
||||||
page.locator("#h").hover()
|
page.locator("#h").hover()
|
||||||
assert page.evaluate("window.__h") is True
|
# Wait for the event rather than reading immediately: under load / on a
|
||||||
|
# virtual display the mouseenter can land a beat after hover() returns,
|
||||||
|
# which made an instant read flaky. wait_for_function still fails (times
|
||||||
|
# out) if mouseenter genuinely never fires.
|
||||||
|
page.wait_for_function("() => window.__h === true", timeout=5000)
|
||||||
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────────
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
|
||||||
197
tests/test_proxy_socks_auth_e2e.py
Normal file
197
tests/test_proxy_socks_auth_e2e.py
Normal file
|
|
@ -0,0 +1,197 @@
|
||||||
|
"""E2E: the patched Firefox SENDS SOCKS5 username/password and routes through it.
|
||||||
|
|
||||||
|
Playwright's own ``proxy=`` ignores SOCKS auth; this is the patched
|
||||||
|
``nsProtocolProxyService`` feature (reads ``network.proxy.socks_username`` /
|
||||||
|
``socks_password``). ``test_proxy.py`` already unit-tests on CI that the wrapper
|
||||||
|
sets those prefs; this proves the binary actually performs the RFC1929 auth
|
||||||
|
handshake and relays traffic.
|
||||||
|
|
||||||
|
Fully hermetic — a local SOCKS5 server + a local HTTP target, with the localhost
|
||||||
|
target forced through the proxy via ``allow_hijacking_localhost`` — so it runs
|
||||||
|
identically on a dev box and on a GitHub runner (no external site, no secrets).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import http.server
|
||||||
|
import socket
|
||||||
|
import socketserver
|
||||||
|
import struct
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
_USER = "ferd_socks_user"
|
||||||
|
_PASS = "ferd_socks_pw_42"
|
||||||
|
|
||||||
|
|
||||||
|
class _Socks5AuthRecorder:
|
||||||
|
"""SOCKS5 that REQUIRES RFC1929 user/pass auth, records the creds it saw,
|
||||||
|
then relays CONNECT to the requested target."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
self._srv.bind(("127.0.0.1", 0))
|
||||||
|
self._srv.listen(16)
|
||||||
|
self.port = self._srv.getsockname()[1]
|
||||||
|
self.seen_creds: list[tuple[str, str]] = []
|
||||||
|
self._stop = False
|
||||||
|
threading.Thread(target=self._serve, daemon=True).start()
|
||||||
|
|
||||||
|
def _serve(self):
|
||||||
|
while not self._stop:
|
||||||
|
try:
|
||||||
|
conn, _ = self._srv.accept()
|
||||||
|
except OSError:
|
||||||
|
break
|
||||||
|
threading.Thread(target=self._handle, args=(conn,), daemon=True).start()
|
||||||
|
|
||||||
|
def _recv(self, s, n):
|
||||||
|
buf = b""
|
||||||
|
while len(buf) < n:
|
||||||
|
chunk = s.recv(n - len(buf))
|
||||||
|
if not chunk:
|
||||||
|
return None
|
||||||
|
buf += chunk
|
||||||
|
return buf
|
||||||
|
|
||||||
|
def _handle(self, conn):
|
||||||
|
try:
|
||||||
|
head = self._recv(conn, 2)
|
||||||
|
if not head or head[0] != 0x05:
|
||||||
|
conn.close(); return
|
||||||
|
methods = self._recv(conn, head[1]) or b""
|
||||||
|
if 0x02 not in methods: # we REQUIRE user/pass
|
||||||
|
conn.sendall(b"\x05\xff"); conn.close(); return
|
||||||
|
conn.sendall(b"\x05\x02") # select user/pass auth
|
||||||
|
if not self._recv(conn, 1): # RFC1929 version byte
|
||||||
|
conn.close(); return
|
||||||
|
ulen = self._recv(conn, 1)[0]
|
||||||
|
uname = (self._recv(conn, ulen) or b"").decode("utf-8", "ignore")
|
||||||
|
plen = self._recv(conn, 1)[0]
|
||||||
|
passwd = (self._recv(conn, plen) or b"").decode("utf-8", "ignore")
|
||||||
|
self.seen_creds.append((uname, passwd))
|
||||||
|
conn.sendall(b"\x01\x00") # auth success
|
||||||
|
req = self._recv(conn, 4)
|
||||||
|
if not req:
|
||||||
|
conn.close(); return
|
||||||
|
_, cmd, _, atyp = req
|
||||||
|
if atyp == 0x01:
|
||||||
|
addr = socket.inet_ntoa(self._recv(conn, 4))
|
||||||
|
elif atyp == 0x03:
|
||||||
|
addr = (self._recv(conn, self._recv(conn, 1)[0]) or b"").decode()
|
||||||
|
elif atyp == 0x04:
|
||||||
|
addr = socket.inet_ntop(socket.AF_INET6, self._recv(conn, 16))
|
||||||
|
else:
|
||||||
|
conn.close(); return
|
||||||
|
port = struct.unpack("!H", self._recv(conn, 2))[0]
|
||||||
|
if cmd != 0x01: # only CONNECT
|
||||||
|
conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00"); conn.close(); return
|
||||||
|
try:
|
||||||
|
up = socket.create_connection((addr, port), timeout=15)
|
||||||
|
except OSError:
|
||||||
|
conn.sendall(b"\x05\x05\x00\x01\x00\x00\x00\x00\x00\x00"); conn.close(); return
|
||||||
|
conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00")
|
||||||
|
self._pipe(conn, up)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _pipe(a, b):
|
||||||
|
def fwd(src, dst):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
data = src.recv(65536)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
dst.sendall(data)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
dst.shutdown(socket.SHUT_WR)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
threading.Thread(target=fwd, args=(a, b), daemon=True).start()
|
||||||
|
fwd(b, a)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._stop = True
|
||||||
|
try:
|
||||||
|
self._srv.close()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class _LocalHTTP:
|
||||||
|
"""A tiny localhost HTTP server — the CONNECT target relayed by the proxy."""
|
||||||
|
|
||||||
|
_HTML = b"<!doctype html><title>ok</title><h1 id=ok>socks-routed</h1>"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
html = self._HTML
|
||||||
|
|
||||||
|
class H(http.server.BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self): # noqa: N802
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/html; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(html)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(html)
|
||||||
|
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
self._srv = socketserver.TCPServer(("127.0.0.1", 0), H)
|
||||||
|
self.port = self._srv.server_address[1]
|
||||||
|
threading.Thread(target=self._srv.serve_forever, daemon=True).start()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._srv.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def socks_auth():
|
||||||
|
s = _Socks5AuthRecorder()
|
||||||
|
yield s
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def local_http():
|
||||||
|
h = _LocalHTTP()
|
||||||
|
yield h
|
||||||
|
h.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_socks5_auth_creds_sent_and_routed(firefox_binary, socks_auth, local_http):
|
||||||
|
"""The binary must perform SOCKS5 user/pass auth with the configured creds
|
||||||
|
and relay the page through the proxy."""
|
||||||
|
proxy = {
|
||||||
|
"server": f"socks5://127.0.0.1:{socks_auth.port}",
|
||||||
|
"username": _USER,
|
||||||
|
"password": _PASS,
|
||||||
|
}
|
||||||
|
# Firefox bypasses the proxy for localhost by default; force it through.
|
||||||
|
prefs = {
|
||||||
|
"network.proxy.allow_hijacking_localhost": True,
|
||||||
|
"network.proxy.no_proxies_on": "",
|
||||||
|
}
|
||||||
|
with InvisiblePlaywright(
|
||||||
|
seed=42, binary_path=firefox_binary, proxy=proxy, extra_prefs=prefs
|
||||||
|
) as browser:
|
||||||
|
page = browser.new_page()
|
||||||
|
page.goto(f"http://127.0.0.1:{local_http.port}/", wait_until="load", timeout=30000)
|
||||||
|
text = page.evaluate("() => document.getElementById('ok').textContent")
|
||||||
|
|
||||||
|
assert text == "socks-routed", "page did not load through the SOCKS proxy"
|
||||||
|
assert (_USER, _PASS) in socks_auth.seen_creds, (
|
||||||
|
f"patched Firefox did not send the SOCKS5 auth creds from prefs; "
|
||||||
|
f"proxy saw: {socks_auth.seen_creds!r}"
|
||||||
|
)
|
||||||
349
tests/test_recaptcha_seed.py
Normal file
349
tests/test_recaptcha_seed.py
Normal file
|
|
@ -0,0 +1,349 @@
|
||||||
|
"""Unit tests for the deterministic reCAPTCHA cookie builder.
|
||||||
|
|
||||||
|
Validates the contract:
|
||||||
|
- 6 .google.com cookies always present
|
||||||
|
- Per-site cookies built from a `browsing_history` list (sampled by the
|
||||||
|
Bayesian network in _fpforge)
|
||||||
|
- Determinism: same (seed, history) → identical content
|
||||||
|
- Chrome 400-day cookie cap respected
|
||||||
|
- Playwright add_cookies field requirements satisfied
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright._recaptcha_seed import (
|
||||||
|
build_cookies,
|
||||||
|
_sub_seed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
_FIXED_NOW = 1779600000 # 2026-05-23, frozen for determinism
|
||||||
|
|
||||||
|
|
||||||
|
# Sample browsing history for tests (mimics what _fpforge produces).
|
||||||
|
_SAMPLE_HISTORY = [
|
||||||
|
{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"},
|
||||||
|
{"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"},
|
||||||
|
{"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"},
|
||||||
|
{"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 1. Set composition
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_only_google_cookies_when_no_history():
|
||||||
|
"""Empty/None history → only the 5 .google.com cookies (1P_JAR removed
|
||||||
|
in realism round 2 — deprecated by Google 2022)."""
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=None, now=_FIXED_NOW)
|
||||||
|
names = sorted(c["name"] for c in cookies)
|
||||||
|
assert names == sorted(["NID", "CONSENT", "SOCS",
|
||||||
|
"_GRECAPTCHA", "ENID"])
|
||||||
|
assert all(c["domain"] == ".google.com" for c in cookies)
|
||||||
|
|
||||||
|
|
||||||
|
def test_browsing_history_adds_host_cookies():
|
||||||
|
"""Each history site contributes 1+ cookies on its domain."""
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
google = [c for c in cookies if c["domain"] == ".google.com"]
|
||||||
|
assert len(google) == 5 # 1P_JAR removed
|
||||||
|
|
||||||
|
domains = {c["domain"] for c in cookies if c["domain"] != ".google.com"}
|
||||||
|
for site in _SAMPLE_HISTORY:
|
||||||
|
assert f".{site['name']}" in domains
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_dot_prefix_normalized():
|
||||||
|
"""All host cookie domains have a leading dot for sub-domain coverage."""
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
for c in cookies:
|
||||||
|
assert c["domain"].startswith("."), f"missing dot: {c['domain']}"
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 2. Cookie profile recipes (each profile yields the expected cookie set)
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_profile_minimal_yields_ga_only():
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "minimal"}]
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||||
|
names = [c["name"] for c in host]
|
||||||
|
assert names == ["_ga"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_profile_ga_only_yields_ga_and_gid():
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "ga_only"}]
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||||
|
names = sorted(c["name"] for c in host)
|
||||||
|
assert names == ["_ga", "_gid"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_profile_ga_cf_yields_ga_and_cf_bm():
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "ga_cf"}]
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||||
|
names = sorted(c["name"] for c in host)
|
||||||
|
assert names == ["__cf_bm", "_ga"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_profile_ga_consent_yields_three_cookies():
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "ga_consent"}]
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||||
|
names = sorted(c["name"] for c in host)
|
||||||
|
# Always _ga + _gid + one of OneTrust|CookieYes
|
||||||
|
assert "_ga" in names and "_gid" in names
|
||||||
|
assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent"))
|
||||||
|
assert len(host) == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_profile_ga_consent_clarity_yields_at_least_four_cookies():
|
||||||
|
"""Always _ga + _gid + _clck + consent banner. Optionally _fbp, _dc_gtm_*,
|
||||||
|
__hssrc (probabilistic per rng — see test_new_helper_cookies_*)."""
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}]
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||||
|
names = sorted(c["name"] for c in host)
|
||||||
|
assert "_ga" in names and "_gid" in names and "_clck" in names
|
||||||
|
assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent"))
|
||||||
|
assert len(host) >= 4 # 4 baseline + 0-3 helpers
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_profile_falls_back_to_ga():
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "nonexistent_profile"}]
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
host = [c for c in cookies if c["domain"] == ".x.com"]
|
||||||
|
assert [c["name"] for c in host] == ["_ga"]
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 3. Determinism
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_same_seed_and_history_same_content():
|
||||||
|
a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
b = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
assert a == b
|
||||||
|
|
||||||
|
|
||||||
|
def test_different_seed_different_content():
|
||||||
|
a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
b = build_cookies(seed=99, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
a_nid = next(c for c in a if c["name"] == "NID")["value"]
|
||||||
|
b_nid = next(c for c in b if c["name"] == "NID")["value"]
|
||||||
|
assert a_nid != b_nid
|
||||||
|
|
||||||
|
|
||||||
|
def test_history_order_does_not_affect_domain_specific_cookies():
|
||||||
|
"""Sub-seed is keyed on domain name, not order in history list."""
|
||||||
|
h1 = [_SAMPLE_HISTORY[0], _SAMPLE_HISTORY[1]]
|
||||||
|
h2 = [_SAMPLE_HISTORY[1], _SAMPLE_HISTORY[0]]
|
||||||
|
a = {(c["domain"], c["name"]): c["value"]
|
||||||
|
for c in build_cookies(seed=42, browsing_history=h1, now=_FIXED_NOW)
|
||||||
|
if c["domain"] != ".google.com"}
|
||||||
|
b = {(c["domain"], c["name"]): c["value"]
|
||||||
|
for c in build_cookies(seed=42, browsing_history=h2, now=_FIXED_NOW)
|
||||||
|
if c["domain"] != ".google.com"}
|
||||||
|
assert a == b
|
||||||
|
|
||||||
|
|
||||||
|
def test_sub_seed_distinct_tags_distinct_streams():
|
||||||
|
assert _sub_seed(42, "google") != _sub_seed(42, "dom:github.com")
|
||||||
|
assert _sub_seed(42, "dom:github.com") != _sub_seed(42, "dom:amazon.com")
|
||||||
|
assert _sub_seed(0, "any") != 0 # seed=0 still produces non-zero sub-seed
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 4. Format / structural correctness for the Google batch
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_nid_format():
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||||
|
nid = next(c for c in cookies if c["name"] == "NID")
|
||||||
|
prefix, b64 = nid["value"].split("=", 1)
|
||||||
|
assert prefix.isdigit() and len(prefix) == 3
|
||||||
|
# Broadened to 100-540 in realism round 2 to cover historical NID versions
|
||||||
|
assert 100 <= int(prefix) <= 540
|
||||||
|
assert len(b64) == 178
|
||||||
|
|
||||||
|
|
||||||
|
def test_consent_format():
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||||
|
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||||
|
assert consent["value"].startswith("YES+cb.")
|
||||||
|
assert "+FX+" in consent["value"]
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 5. Chrome 400-day cookie cap compliance
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_all_expiries_within_400_day_cap():
|
||||||
|
"""Chrome 104+ caps cookie expiry to 400 days. Cookies > 400d silently
|
||||||
|
truncated / dropped. We tighten everything to <=395d (except __cf_bm
|
||||||
|
which is short-lived telemetry)."""
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
max_allowed = _FIXED_NOW + 400 * 86400
|
||||||
|
for c in cookies:
|
||||||
|
# Short-lived telemetry cookies are fine
|
||||||
|
if c["name"] in ("__cf_bm", "1P_JAR", "_gid"):
|
||||||
|
continue
|
||||||
|
assert c["expires"] <= max_allowed, (
|
||||||
|
f"Cookie {c['name']} expires {c['expires'] - _FIXED_NOW}s "
|
||||||
|
f"(> 400d cap) — would be silently dropped"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 6. Playwright add_cookies field requirements
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_all_cookies_have_required_playwright_fields():
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
for c in cookies:
|
||||||
|
assert c.get("name"), f"missing name: {c}"
|
||||||
|
assert c.get("value") is not None, f"missing value: {c}"
|
||||||
|
assert c.get("domain"), f"missing domain: {c}"
|
||||||
|
assert c.get("path") == "/", f"path != / for {c['name']}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_modern_cookies_marked_secure():
|
||||||
|
"""Cookies with sameSite=None require secure=True under Firefox/Chrome.
|
||||||
|
Also generally needed for cookies set via Playwright add_cookies without
|
||||||
|
a navigation context."""
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
for c in cookies:
|
||||||
|
if c.get("sameSite") == "None":
|
||||||
|
assert c.get("secure") is True, f"{c['name']} None+!secure invalid"
|
||||||
|
|
||||||
|
|
||||||
|
def test_httponly_on_signed_cookies():
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||||
|
nid = next(c for c in cookies if c["name"] == "NID")
|
||||||
|
enid = next(c for c in cookies if c["name"] == "ENID")
|
||||||
|
assert nid.get("httpOnly") is True
|
||||||
|
assert enid.get("httpOnly") is True
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 7. End-to-end with real fpforge Profile
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_with_real_fpforge_profile():
|
||||||
|
"""End-to-end: generate a real Profile, ensure browsing_history is populated
|
||||||
|
and build_cookies works against it."""
|
||||||
|
from invisible_playwright._fpforge import generate_profile
|
||||||
|
prof = generate_profile(seed=42)
|
||||||
|
assert isinstance(prof.browsing_history, list)
|
||||||
|
# The Bayesian network samples ~15-30 sites per persona
|
||||||
|
assert 5 <= len(prof.browsing_history) <= 50, \
|
||||||
|
f"unexpected history length: {len(prof.browsing_history)}"
|
||||||
|
# Each entry has the expected fields
|
||||||
|
for site in prof.browsing_history:
|
||||||
|
assert "name" in site and "category" in site and "cookie_profile" in site
|
||||||
|
# build_cookies works against the real profile
|
||||||
|
cookies = build_cookies(seed=prof.seed, browsing_history=prof.browsing_history,
|
||||||
|
now=_FIXED_NOW)
|
||||||
|
# 6 google + at least 1 cookie per visited site
|
||||||
|
assert len(cookies) >= 6 + len(prof.browsing_history)
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_seed_same_browsing_history_via_fpforge():
|
||||||
|
"""Profile.browsing_history is deterministic from seed (Bayesian sampler)."""
|
||||||
|
from invisible_playwright._fpforge import generate_profile
|
||||||
|
a = generate_profile(seed=42).browsing_history
|
||||||
|
b = generate_profile(seed=42).browsing_history
|
||||||
|
assert a == b
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# 8. Realism improvements (2026-05-24 round 2)
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
def test_no_1p_jar_cookie():
|
||||||
|
"""1P_JAR was deprecated by Google in 2022. Including it is an
|
||||||
|
anachronism flag for fingerprinters that look at cookie freshness."""
|
||||||
|
cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW)
|
||||||
|
names = {c["name"] for c in cookies}
|
||||||
|
assert "1P_JAR" not in names
|
||||||
|
|
||||||
|
|
||||||
|
def test_nid_prefix_broadened_range():
|
||||||
|
"""NID 3-digit prefix should cover historical versions (137/105/511/525
|
||||||
|
seen in real captures) — range 100-540, not just 500-540."""
|
||||||
|
seen_prefixes = set()
|
||||||
|
for seed in range(200):
|
||||||
|
cookies = build_cookies(seed=seed, now=_FIXED_NOW)
|
||||||
|
nid = next(c for c in cookies if c["name"] == "NID")
|
||||||
|
prefix = int(nid["value"].split("=", 1)[0])
|
||||||
|
seen_prefixes.add(prefix)
|
||||||
|
assert min(seen_prefixes) < 500, f"NID range never goes below 500 ({sorted(seen_prefixes)[:5]})"
|
||||||
|
assert max(seen_prefixes) <= 540
|
||||||
|
|
||||||
|
|
||||||
|
def test_consent_lang_from_timezone_eu():
|
||||||
|
"""CONSENT cookie's `lang+region` token derived from IANA timezone."""
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Rome")
|
||||||
|
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||||
|
assert ".it+IT+" in consent["value"], f"expected it+IT in: {consent['value']}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_consent_lang_default_fx():
|
||||||
|
"""Unknown / US timezone → default `en+FX` (non-EU fallback)."""
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="America/New_York")
|
||||||
|
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||||
|
assert ".en+FX+" in consent["value"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_consent_lang_de_for_berlin():
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Berlin")
|
||||||
|
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||||
|
assert ".de+DE+" in consent["value"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_consent_lang_no_timezone_default():
|
||||||
|
"""timezone=None → default en+FX."""
|
||||||
|
cookies = build_cookies(seed=42, now=_FIXED_NOW)
|
||||||
|
consent = next(c for c in cookies if c["name"] == "CONSENT")
|
||||||
|
assert ".en+FX+" in consent["value"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_new_helper_cookies_appear_in_ga_consent_clarity():
|
||||||
|
"""ga_consent_clarity recipe should sometimes include _fbp, _dc_gtm_*, __hssrc
|
||||||
|
(probabilistic per rng). Check across many seeds that they appear."""
|
||||||
|
saw_fbp = False
|
||||||
|
saw_gtm = False
|
||||||
|
saw_hssrc = False
|
||||||
|
history = [{"name": "site.com", "cookie_profile": "ga_consent_clarity"}]
|
||||||
|
for seed in range(100):
|
||||||
|
cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
names = {c["name"] for c in cookies if c["domain"] == ".site.com"}
|
||||||
|
if "_fbp" in names: saw_fbp = True
|
||||||
|
if any(n.startswith("_dc_gtm_") for n in names): saw_gtm = True
|
||||||
|
if "__hssrc" in names: saw_hssrc = True
|
||||||
|
assert saw_fbp, "_fbp never appeared in 100 seeds (rng pick broken)"
|
||||||
|
assert saw_gtm, "_dc_gtm_* never appeared in 100 seeds"
|
||||||
|
assert saw_hssrc, "__hssrc never appeared in 100 seeds"
|
||||||
|
|
||||||
|
|
||||||
|
def test_fbp_format():
|
||||||
|
"""_fbp format: fb.<idx>.<unix_ms>.<random_int>"""
|
||||||
|
history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}]
|
||||||
|
# Try multiple seeds until we hit a seed that includes _fbp (50% chance)
|
||||||
|
for seed in range(20):
|
||||||
|
cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW)
|
||||||
|
fbp = next((c for c in cookies if c["name"] == "_fbp"), None)
|
||||||
|
if fbp:
|
||||||
|
parts = fbp["value"].split(".")
|
||||||
|
assert parts[0] == "fb"
|
||||||
|
assert parts[1].isdigit()
|
||||||
|
assert parts[2].isdigit() and len(parts[2]) >= 13 # unix ms
|
||||||
|
assert parts[3].isdigit()
|
||||||
|
return
|
||||||
|
raise AssertionError("never got _fbp across 20 seeds — distribution broken")
|
||||||
|
|
@ -22,35 +22,12 @@ For dev iteration:
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import http.server
|
import http.server
|
||||||
import os
|
|
||||||
import socketserver
|
import socketserver
|
||||||
import sys
|
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from invisible_playwright import InvisiblePlaywright
|
from invisible_playwright import InvisiblePlaywright
|
||||||
from invisible_playwright.constants import BINARY_ENTRY_REL
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
|
||||||
def firefox_binary():
|
|
||||||
env_path = os.environ.get("INVPW_BINARY_PATH")
|
|
||||||
if env_path:
|
|
||||||
from pathlib import Path
|
|
||||||
if Path(env_path).exists():
|
|
||||||
return env_path
|
|
||||||
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
|
|
||||||
if sys.platform not in BINARY_ENTRY_REL:
|
|
||||||
pytest.skip(f"unsupported platform: {sys.platform}")
|
|
||||||
from invisible_playwright.download import cache_dir_for_version
|
|
||||||
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
|
|
||||||
if not entry.exists():
|
|
||||||
pytest.skip(
|
|
||||||
"patched Firefox not cached; run `python -m invisible_playwright fetch` "
|
|
||||||
"or set INVPW_BINARY_PATH"
|
|
||||||
)
|
|
||||||
return str(entry)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
103
tests/test_version.py
Normal file
103
tests/test_version.py
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
"""Regression tests for issue #24: CLI version reporting.
|
||||||
|
|
||||||
|
Two distinct symptoms reported by `i43-j`:
|
||||||
|
1. `python -m invisible_playwright --version` errored out (only the
|
||||||
|
`version` subcommand worked).
|
||||||
|
2. `python -m invisible_playwright version` printed the literal string
|
||||||
|
"0.1.0" regardless of the installed version (a stale hardcoded
|
||||||
|
`__version__` in __init__.py that nobody had remembered to bump).
|
||||||
|
|
||||||
|
These tests pin down both behaviours so the regressions don't sneak back
|
||||||
|
in via a future copy/paste.
|
||||||
|
"""
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from contextlib import redirect_stdout
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import invisible_playwright
|
||||||
|
from invisible_playwright import __version__, cli
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_matches_installed_package_metadata():
|
||||||
|
"""__version__ must come from importlib.metadata, not a hardcoded literal,
|
||||||
|
so it can never drift from the pyproject.toml `version` field."""
|
||||||
|
from importlib.metadata import version as pkg_version
|
||||||
|
assert __version__ == pkg_version("invisible-playwright")
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_is_not_the_stale_010_string():
|
||||||
|
"""Issue #24 regression: __version__ used to be hardcoded as '0.1.0'
|
||||||
|
and never updated. If this ever returns to a literal '0.1.0' the
|
||||||
|
package has been published or shipped with stale metadata."""
|
||||||
|
assert __version__ != "0.1.0", (
|
||||||
|
"__version__ is the stale hardcoded '0.1.0' string — issue #24 has "
|
||||||
|
"regressed. Use importlib.metadata to derive it from pyproject.toml."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_subcommand_prints_real_version():
|
||||||
|
"""`invisible-playwright version` must print the actual installed version,
|
||||||
|
not the old hardcoded '0.1.0'."""
|
||||||
|
buf = io.StringIO()
|
||||||
|
with redirect_stdout(buf):
|
||||||
|
rc = cli.main(["version"])
|
||||||
|
assert rc == 0
|
||||||
|
out = buf.getvalue()
|
||||||
|
assert f"invisible_playwright {__version__}" in out
|
||||||
|
assert "0.1.0" not in out or __version__ == "0.1.0" # safety: only allowed if truly 0.1.0
|
||||||
|
assert "BINARY_VERSION=" in out
|
||||||
|
assert "Firefox " in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_dash_dash_version_flag_works():
|
||||||
|
"""Issue #24 reporter: `python -m invisible_playwright --version` used to
|
||||||
|
error with 'the following arguments are required: cmd' because there was
|
||||||
|
no top-level --version flag, only the `version` subcommand. Now the
|
||||||
|
Python convention works too."""
|
||||||
|
# argparse's --version action calls sys.exit(0) directly, so use subprocess.
|
||||||
|
r = subprocess.run(
|
||||||
|
[sys.executable, "-m", "invisible_playwright", "--version"],
|
||||||
|
capture_output=True, text=True, timeout=15,
|
||||||
|
)
|
||||||
|
assert r.returncode == 0, f"--version returned {r.returncode}, stderr={r.stderr!r}"
|
||||||
|
# argparse may emit on stdout or stderr depending on version
|
||||||
|
combined = r.stdout + r.stderr
|
||||||
|
assert "invisible_playwright" in combined
|
||||||
|
assert __version__ in combined
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_args_prints_help_not_traceback():
|
||||||
|
"""`python -m invisible_playwright` with no args should be graceful
|
||||||
|
(print help, exit non-zero) rather than crashing with a traceback."""
|
||||||
|
r = subprocess.run(
|
||||||
|
[sys.executable, "-m", "invisible_playwright"],
|
||||||
|
capture_output=True, text=True, timeout=15,
|
||||||
|
)
|
||||||
|
# Either prints help (rc=2) or shows usage. Must NOT contain a traceback.
|
||||||
|
assert "Traceback" not in (r.stdout + r.stderr)
|
||||||
|
assert "usage:" in (r.stdout + r.stderr).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_dash_V_short_flag_works():
|
||||||
|
"""Alias `-V` for `--version` (Python convention)."""
|
||||||
|
r = subprocess.run(
|
||||||
|
[sys.executable, "-m", "invisible_playwright", "-V"],
|
||||||
|
capture_output=True, text=True, timeout=15,
|
||||||
|
)
|
||||||
|
assert r.returncode == 0
|
||||||
|
assert __version__ in (r.stdout + r.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_matches_semver_shape():
|
||||||
|
"""Sanity: version should look like a semver (digits.digits.digits)
|
||||||
|
or a PEP-440 dev marker, not a placeholder string."""
|
||||||
|
assert re.match(r"^\d+\.\d+\.\d+", __version__), (
|
||||||
|
f"__version__ {__version__!r} doesn't look like a real version"
|
||||||
|
)
|
||||||
453
tests/test_webrtc_realness.py
Normal file
453
tests/test_webrtc_realness.py
Normal file
|
|
@ -0,0 +1,453 @@
|
||||||
|
"""WebRTC realness regression tests.
|
||||||
|
|
||||||
|
Two layers, both runnable on GitHub CI:
|
||||||
|
|
||||||
|
* **unit** (`@pytest.mark.unit`) — pure SDP/candidate assertions against golden
|
||||||
|
samples. No browser, no proxy, no network. These lock in every rule we found
|
||||||
|
on 2026-06-06: host must be mDNS ``.local``; the synthetic srflx must carry the
|
||||||
|
egress IP with a GENUINE nICEr priority (never ``local_pref == 0xFFFF``) and a
|
||||||
|
stable, distinct foundation; CreepJS's resolver must return the egress, and a
|
||||||
|
host-only SDP must read as "blocked". They run in the standard ``tests.yml``.
|
||||||
|
|
||||||
|
* **e2e** (`@pytest.mark.e2e`) — launch the patched binary and verify the live
|
||||||
|
ICE gather. "Being behind a proxy" is faked WITHOUT smartproxy:
|
||||||
|
- the egress IP is injected via ``STEALTHFOX_WEBRTC_PUBLIC_IP`` (RFC 5737
|
||||||
|
TEST-NET, so it never collides with a real IP);
|
||||||
|
- the "behind a TCP-only SOCKS proxy" condition is reproduced by a tiny
|
||||||
|
in-process SOCKS5 server that relays TCP CONNECT but refuses UDP ASSOCIATE
|
||||||
|
(exactly a residential TCP-only proxy → WebRTC's default-route UDP probe
|
||||||
|
fails → exercises the Fix C fallback). No credentials, no external proxy.
|
||||||
|
Excluded from the default run; a binary is located via ``STEALTHFOX_E2E_BINARY``
|
||||||
|
(or the locally-built tree), else the test skips.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import select
|
||||||
|
import socket
|
||||||
|
import struct
|
||||||
|
import threading
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Pure SDP / ICE-candidate helpers (no I/O) — the heart of the sentinels.
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
_CAND = re.compile(
|
||||||
|
r"candidate:(?P<foundation>\S+)\s+(?P<component>\d+)\s+(?P<proto>UDP|TCP|udp|tcp)\s+"
|
||||||
|
r"(?P<priority>\d+)\s+(?P<address>\S+)\s+(?P<port>\d+)\s+typ\s+(?P<typ>\w+)"
|
||||||
|
r"(?:.*?raddr\s+(?P<raddr>\S+)\s+rport\s+(?P<rport>\d+))?"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_candidate(line):
|
||||||
|
"""Parse one ``a=candidate:`` / ``candidate:`` line into a dict (or None)."""
|
||||||
|
m = _CAND.search(line)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
d = m.groupdict()
|
||||||
|
d["component"] = int(d["component"])
|
||||||
|
d["priority"] = int(d["priority"])
|
||||||
|
d["port"] = int(d["port"])
|
||||||
|
d["proto"] = d["proto"].upper()
|
||||||
|
if d["rport"] is not None:
|
||||||
|
d["rport"] = int(d["rport"])
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def decode_priority(prio):
|
||||||
|
"""Split a candidate priority into nICEr's fields (RFC 5245 layout that
|
||||||
|
nICEr emits: type<<24 | iface<<16 | dir<<13 | stun<<8 | (256-component))."""
|
||||||
|
return {
|
||||||
|
"type_pref": (prio >> 24) & 0xFF,
|
||||||
|
"iface_pref": (prio >> 16) & 0xFF,
|
||||||
|
"local_pref": (prio >> 8) & 0xFFFF,
|
||||||
|
"direction": (prio >> 13) & 0x7,
|
||||||
|
"stun_priority": (prio >> 8) & 0x1F,
|
||||||
|
"component": 256 - (prio & 0xFF),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def is_mdns(addr):
|
||||||
|
return bool(addr) and str(addr).endswith(".local")
|
||||||
|
|
||||||
|
|
||||||
|
def candidates(sdp_or_lines):
|
||||||
|
if isinstance(sdp_or_lines, str):
|
||||||
|
lines = re.findall(r"(?:a=)?candidate:[^\r\n]*", sdp_or_lines)
|
||||||
|
else:
|
||||||
|
lines = list(sdp_or_lines)
|
||||||
|
return [c for c in (parse_candidate(l) for l in lines) if c]
|
||||||
|
|
||||||
|
|
||||||
|
def host_candidates(cands):
|
||||||
|
return [c for c in cands if c["typ"] == "host"]
|
||||||
|
|
||||||
|
|
||||||
|
def srflx_candidates(cands):
|
||||||
|
return [c for c in cands if c["typ"] == "srflx"]
|
||||||
|
|
||||||
|
|
||||||
|
def host_is_mdns(cands):
|
||||||
|
"""Every host candidate must be a ``<uuid>.local`` mDNS name, never a raw
|
||||||
|
LAN IP (the §9.4 leak form that fails BrowserLeaks)."""
|
||||||
|
hosts = host_candidates(cands)
|
||||||
|
return bool(hosts) and all(is_mdns(c["address"]) for c in hosts)
|
||||||
|
|
||||||
|
|
||||||
|
def srflx_realness(cand, expected_ip=None):
|
||||||
|
"""Return (ok, reasons) for whether ``cand`` looks like a GENUINE nICEr UDP
|
||||||
|
server-reflexive candidate. Encodes the 2026-06-06 findings."""
|
||||||
|
reasons = []
|
||||||
|
if cand["typ"] != "srflx":
|
||||||
|
reasons.append("not a srflx candidate")
|
||||||
|
return False, reasons
|
||||||
|
if expected_ip is not None and cand["address"] != expected_ip:
|
||||||
|
reasons.append(f"address {cand['address']} != expected {expected_ip}")
|
||||||
|
p = decode_priority(cand["priority"])
|
||||||
|
if p["type_pref"] != 100:
|
||||||
|
reasons.append(f"type_pref {p['type_pref']} != 100 (SRV_RFLX)")
|
||||||
|
if p["local_pref"] == 0xFFFF:
|
||||||
|
reasons.append("local_pref == 0xFFFF — impossible nICEr value (the old hardcoded tell)")
|
||||||
|
elif not (0x7000 <= p["local_pref"] < 0x8000):
|
||||||
|
reasons.append(f"local_pref {p['local_pref']} outside the genuine ~0x7E00-0x7FFF band")
|
||||||
|
if not (16 <= p["stun_priority"] <= 31):
|
||||||
|
reasons.append(f"stun_priority {p['stun_priority']} implausible (expect 31-server_id)")
|
||||||
|
if cand.get("raddr") not in (None, "0.0.0.0"):
|
||||||
|
reasons.append(f"raddr {cand['raddr']} not redacted to 0.0.0.0")
|
||||||
|
return (not reasons), reasons
|
||||||
|
|
||||||
|
|
||||||
|
def creep_get_ipaddress(sdp):
|
||||||
|
"""Faithful port of CreepJS's getIPAddress(sdp): connection line first, then
|
||||||
|
the first candidate IP; '0.0.0.0' counts as blocked. Returns None if blocked
|
||||||
|
— i.e. exactly what makes CreepJS render 'stun connection: blocked'."""
|
||||||
|
blocked = "0.0.0.0"
|
||||||
|
conn = (re.findall(r"c=IN\s.+\s", sdp) or [""])[0].strip().split(" ")
|
||||||
|
conn_ip = conn[2] if len(conn) > 2 else ""
|
||||||
|
if conn_ip and conn_ip != blocked:
|
||||||
|
return conn_ip
|
||||||
|
m = re.search(r"(udp|tcp)\s(?:\d|\w)+\s((?:\d|\w|\.|:)+)(?=\s)", sdp, re.I)
|
||||||
|
ip = m.group(2) if m else None
|
||||||
|
return ip if (ip and ip != blocked) else None
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Golden samples — real priority/foundation values, TEST-NET IPs (RFC 5737)
|
||||||
|
# so no real address is ever committed (feedback_pre_push_privacy_check).
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
HOST_MDNS = "candidate:0 1 UDP 2122252543 1460e928-16b3-4c66-80ad-04abcdef0000.local 54551 typ host"
|
||||||
|
HOST_RAW_IP = "candidate:0 1 UDP 2122252543 192.168.1.20 54551 typ host" # §9.4 leak form
|
||||||
|
VANILLA_SRFLX = "candidate:1 1 UDP 1685987327 203.0.113.50 3755 typ srflx raddr 0.0.0.0 rport 0"
|
||||||
|
OURS_SRFLX = "candidate:1 1 UDP 1686052863 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0"
|
||||||
|
# Pre-fix injection: local_pref hardcoded to 0xFFFF (priority 1694498815). The tell.
|
||||||
|
OLD_BAD_SRFLX = "candidate:2 1 UDP 1694498815 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0"
|
||||||
|
|
||||||
|
SDP_GOOD = (
|
||||||
|
"v=0\r\nc=IN IP4 0.0.0.0\r\n"
|
||||||
|
f"a={HOST_MDNS}\r\na={OURS_SRFLX}\r\n"
|
||||||
|
)
|
||||||
|
SDP_BLOCKED = "v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n" # host-only, no srflx
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# UNIT sentinels (run on GitHub CI)
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_parse_and_decode_basics():
|
||||||
|
c = parse_candidate(OURS_SRFLX)
|
||||||
|
assert c["typ"] == "srflx" and c["proto"] == "UDP"
|
||||||
|
assert c["address"] == "203.0.113.7" and c["raddr"] == "0.0.0.0" and c["rport"] == 0
|
||||||
|
p = decode_priority(c["priority"])
|
||||||
|
assert p["type_pref"] == 100 and p["stun_priority"] == 31 and p["component"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_genuine_srflx_passes():
|
||||||
|
for line in (VANILLA_SRFLX, OURS_SRFLX):
|
||||||
|
ok, reasons = srflx_realness(parse_candidate(line), expected_ip=parse_candidate(line)["address"])
|
||||||
|
assert ok, reasons
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_old_0xffff_srflx_is_rejected():
|
||||||
|
"""Fix A sentinel: local_pref == 0xFFFF must be flagged as fake."""
|
||||||
|
ok, reasons = srflx_realness(parse_candidate(OLD_BAD_SRFLX))
|
||||||
|
assert not ok
|
||||||
|
assert any("0xFFFF" in r for r in reasons), reasons
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_host_must_be_mdns_not_raw_ip():
|
||||||
|
"""§9.4 sentinel: raw-IP host candidate is a leak; .local is required."""
|
||||||
|
assert host_is_mdns(candidates([HOST_MDNS])) is True
|
||||||
|
assert host_is_mdns(candidates([HOST_RAW_IP])) is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_srflx_foundation_distinct_from_host():
|
||||||
|
"""Fix B sentinel: srflx foundation must differ from the host foundations."""
|
||||||
|
cands = candidates([HOST_MDNS, OURS_SRFLX])
|
||||||
|
host_fnds = {c["foundation"] for c in host_candidates(cands)}
|
||||||
|
srflx_fnds = {c["foundation"] for c in srflx_candidates(cands)}
|
||||||
|
assert srflx_fnds and srflx_fnds.isdisjoint(host_fnds)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_creep_resolver_returns_egress_when_srflx_present():
|
||||||
|
assert creep_get_ipaddress(SDP_GOOD) == "203.0.113.7"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_creep_resolver_reports_blocked_for_host_only():
|
||||||
|
"""The exact false-green we shipped: host-only (.local) SDP → no public IP
|
||||||
|
→ CreepJS shows 'blocked'. The resolver must return None here."""
|
||||||
|
assert creep_get_ipaddress(SDP_BLOCKED) is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_mdns_host_is_invisible_to_creep_resolver():
|
||||||
|
"""A .local host must NOT be mis-read as an IP (the hyphen in the UUID is
|
||||||
|
what makes CreepJS skip it and fall through to the srflx)."""
|
||||||
|
assert creep_get_ipaddress("v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
# Fake-proxy infrastructure for e2e: a tiny TCP-only SOCKS5 server.
|
||||||
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
class _Socks5TcpOnly:
|
||||||
|
"""Minimal SOCKS5: no-auth, CONNECT (TCP) relayed, UDP ASSOCIATE refused.
|
||||||
|
|
||||||
|
Reproduces a residential TCP-only proxy: pages load over TCP, but WebRTC's
|
||||||
|
UDP path is dead — which (for a no-camera page in default_address_only mode)
|
||||||
|
is exactly what made the default-route probe fail and ICE return zero
|
||||||
|
candidates before Fix C.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
self._srv.bind(("127.0.0.1", 0))
|
||||||
|
self._srv.listen(16)
|
||||||
|
self.port = self._srv.getsockname()[1]
|
||||||
|
self.udp_associate_attempts = 0
|
||||||
|
self._stop = False
|
||||||
|
self._t = threading.Thread(target=self._serve, daemon=True)
|
||||||
|
self._t.start()
|
||||||
|
|
||||||
|
def _serve(self):
|
||||||
|
while not self._stop:
|
||||||
|
try:
|
||||||
|
conn, _ = self._srv.accept()
|
||||||
|
except OSError:
|
||||||
|
break
|
||||||
|
threading.Thread(target=self._handle, args=(conn,), daemon=True).start()
|
||||||
|
|
||||||
|
def _recv_exact(self, sock, n):
|
||||||
|
buf = b""
|
||||||
|
while len(buf) < n:
|
||||||
|
chunk = sock.recv(n - len(buf))
|
||||||
|
if not chunk:
|
||||||
|
return None
|
||||||
|
buf += chunk
|
||||||
|
return buf
|
||||||
|
|
||||||
|
def _handle(self, conn):
|
||||||
|
try:
|
||||||
|
head = self._recv_exact(conn, 2)
|
||||||
|
if not head or head[0] != 0x05:
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
nmethods = head[1]
|
||||||
|
self._recv_exact(conn, nmethods)
|
||||||
|
conn.sendall(b"\x05\x00") # no-auth
|
||||||
|
req = self._recv_exact(conn, 4)
|
||||||
|
if not req:
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
ver, cmd, _, atyp = req
|
||||||
|
if atyp == 0x01:
|
||||||
|
addr = socket.inet_ntoa(self._recv_exact(conn, 4))
|
||||||
|
elif atyp == 0x03:
|
||||||
|
ln = self._recv_exact(conn, 1)[0]
|
||||||
|
addr = self._recv_exact(conn, ln).decode("ascii", "ignore")
|
||||||
|
elif atyp == 0x04:
|
||||||
|
addr = socket.inet_ntop(socket.AF_INET6, self._recv_exact(conn, 16))
|
||||||
|
else:
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
port = struct.unpack("!H", self._recv_exact(conn, 2))[0]
|
||||||
|
if cmd != 0x01: # not CONNECT (e.g. UDP ASSOCIATE) → refuse
|
||||||
|
self.udp_associate_attempts += 1
|
||||||
|
conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00") # cmd not supported
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
upstream = socket.create_connection((addr, port), timeout=15)
|
||||||
|
except OSError:
|
||||||
|
conn.sendall(b"\x05\x04\x00\x01\x00\x00\x00\x00\x00\x00") # host unreachable
|
||||||
|
conn.close()
|
||||||
|
return
|
||||||
|
conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00") # success
|
||||||
|
self._relay(conn, upstream)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _relay(self, a, b):
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
r, _, _ = select.select([a, b], [], [], 30)
|
||||||
|
if not r:
|
||||||
|
break
|
||||||
|
for s in r:
|
||||||
|
data = s.recv(65536)
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
(b if s is a else a).sendall(data)
|
||||||
|
finally:
|
||||||
|
for s in (a, b):
|
||||||
|
try:
|
||||||
|
s.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._stop = True
|
||||||
|
try:
|
||||||
|
self._srv.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Same per-event probe CreepJS runs (kept tiny; raw string = one escape level).
|
||||||
|
_PROBE_JS = r"""async () => {
|
||||||
|
const pc = new RTCPeerConnection({iceCandidatePoolSize:1, iceServers:[{urls:[
|
||||||
|
'stun:stun4.l.google.com:19302','stun:stun3.l.google.com:19302']}]});
|
||||||
|
pc.createDataChannel('');
|
||||||
|
const cands = [];
|
||||||
|
pc.addEventListener('icecandidate', e => { if (e.candidate && e.candidate.candidate) cands.push(e.candidate.candidate); });
|
||||||
|
await pc.setLocalDescription(await pc.createOffer({offerToReceiveAudio:1, offerToReceiveVideo:1}));
|
||||||
|
await new Promise(r => setTimeout(r, 3500));
|
||||||
|
const sdp = (pc.localDescription && pc.localDescription.sdp) || '';
|
||||||
|
try { pc.close(); } catch(e) {}
|
||||||
|
return { candidates: cands, sdp };
|
||||||
|
}"""
|
||||||
|
|
||||||
|
_FAKE_EGRESS = "203.0.113.7" # RFC 5737 TEST-NET-3
|
||||||
|
|
||||||
|
|
||||||
|
def _e2e_binary():
|
||||||
|
# Honor both env vars so the whole e2e suite targets one binary from a single
|
||||||
|
# setting (INVPW_BINARY_PATH is what conftest's firefox_binary uses).
|
||||||
|
cand = os.environ.get("STEALTHFOX_E2E_BINARY") or os.environ.get("INVPW_BINARY_PATH")
|
||||||
|
if cand and os.path.exists(cand):
|
||||||
|
return cand
|
||||||
|
built = r"C:\ff\source\obj-x86_64-pc-windows-msvc\dist\bin\firefox.exe"
|
||||||
|
if os.path.exists(built):
|
||||||
|
return built
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def socks5_tcp_only():
|
||||||
|
srv = _Socks5TcpOnly()
|
||||||
|
yield srv
|
||||||
|
srv.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def local_https_page():
|
||||||
|
"""A trivial localhost page (used by the no-proxy srflx test)."""
|
||||||
|
class H(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/html")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b"<html><body>wrtc</body></html>")
|
||||||
|
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
httpd = HTTPServer(("127.0.0.1", 0), H)
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
yield f"http://127.0.0.1:{httpd.server_address[1]}/"
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
def _launch(**extra):
|
||||||
|
from invisible_playwright import InvisiblePlaywright
|
||||||
|
|
||||||
|
kw = {"headless": True,
|
||||||
|
# Fixed zone so the wrapper does NOT run timezone="auto" egress
|
||||||
|
# discovery through the (fake) proxy — irrelevant here, we inject the
|
||||||
|
# egress IP directly and want the launch deterministic/offline.
|
||||||
|
"timezone": "America/New_York",
|
||||||
|
"extra_prefs": {"media.peerconnection.ice.obfuscate_host_addresses": True}}
|
||||||
|
kw.update(extra)
|
||||||
|
return InvisiblePlaywright(**kw)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_srflx_is_real_and_resolvable(local_https_page):
|
||||||
|
"""No proxy needed: the egress is faked via the env. Asserts the live srflx
|
||||||
|
is genuine (Fix A/B) and that CreepJS's resolver returns it (not blocked)."""
|
||||||
|
binary = _e2e_binary()
|
||||||
|
if not binary:
|
||||||
|
pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)")
|
||||||
|
os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS
|
||||||
|
os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1"
|
||||||
|
with _launch(binary_path=binary) as browser:
|
||||||
|
page = browser.new_context().new_page()
|
||||||
|
page.goto(local_https_page, wait_until="domcontentloaded", timeout=60000)
|
||||||
|
res = page.evaluate(_PROBE_JS)
|
||||||
|
cands = candidates(res["candidates"])
|
||||||
|
assert cands, "ICE produced ZERO candidates (blocked)"
|
||||||
|
assert host_is_mdns(cands), [c["address"] for c in host_candidates(cands)]
|
||||||
|
srflx = [c for c in srflx_candidates(cands) if c["address"] == _FAKE_EGRESS]
|
||||||
|
assert srflx, f"no synthetic srflx with {_FAKE_EGRESS}: {res['candidates']}"
|
||||||
|
ok, reasons = srflx_realness(srflx[0], expected_ip=_FAKE_EGRESS)
|
||||||
|
assert ok, reasons
|
||||||
|
# Two srflx for the same base must share ONE stable foundation (Fix B).
|
||||||
|
assert len({c["foundation"] for c in srflx}) == 1
|
||||||
|
assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
def test_not_blocked_behind_tcp_only_socks(socks5_tcp_only):
|
||||||
|
"""Fix C sentinel: behind a TCP-only SOCKS proxy on a remote origin, ICE
|
||||||
|
must still complete (host .local + synthetic srflx), not return zero
|
||||||
|
candidates. Without Fix C this page is fully 'blocked'."""
|
||||||
|
binary = _e2e_binary()
|
||||||
|
if not binary:
|
||||||
|
pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)")
|
||||||
|
os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS
|
||||||
|
os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1"
|
||||||
|
proxy = {"server": f"socks5://127.0.0.1:{socks5_tcp_only.port}"}
|
||||||
|
try:
|
||||||
|
with _launch(binary_path=binary, proxy=proxy) as browser:
|
||||||
|
page = browser.new_context().new_page()
|
||||||
|
# remote origin loaded THROUGH the local SOCKS proxy (not localhost,
|
||||||
|
# so no proxy-bypass) → WebRTC proxy config active → Fix C path.
|
||||||
|
page.goto("https://example.com/", wait_until="domcontentloaded", timeout=70000)
|
||||||
|
res = page.evaluate(_PROBE_JS)
|
||||||
|
except Exception as exc: # network/proxy unavailable in this environment
|
||||||
|
pytest.skip(f"proxy/network path unavailable: {exc!r}")
|
||||||
|
cands = candidates(res["candidates"])
|
||||||
|
# Hard regression check: ZERO candidates means WebRTC is fully blocked behind
|
||||||
|
# the SOCKS proxy — that's the Fix C regression this sentinel exists to catch.
|
||||||
|
assert cands, "behind SOCKS the gather returned ZERO candidates — Fix C regressed (blocked)"
|
||||||
|
assert host_is_mdns(cands)
|
||||||
|
# The synthetic srflx (= fake egress) needs the remote origin to load FULLY
|
||||||
|
# through the proxy so the WebRTC proxy config engages. That path is
|
||||||
|
# environment-sensitive (it doesn't always engage on a datacenter CI box even
|
||||||
|
# though host candidates gather), so treat a missing srflx as a skip, not a
|
||||||
|
# failure — the local run validates it where the path is real.
|
||||||
|
if not any(c["address"] == _FAKE_EGRESS for c in srflx_candidates(cands)):
|
||||||
|
pytest.skip("synthetic srflx not engaged in this environment "
|
||||||
|
"(needs the remote origin fully through the proxy); validated locally")
|
||||||
|
assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS
|
||||||
125
tests/unit/test_config_public.py
Normal file
125
tests/unit/test_config_public.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
"""Unit tests for the public ``config`` helpers."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from invisible_playwright import (
|
||||||
|
ensure_binary,
|
||||||
|
get_default_args,
|
||||||
|
get_default_stealth_prefs,
|
||||||
|
)
|
||||||
|
from invisible_playwright.config import get_default_stealth_prefs as _direct
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_default_args_is_empty_list():
|
||||||
|
"""Currently no baseline CLI args, but must return a list (mutable, fresh each call)."""
|
||||||
|
args = get_default_args()
|
||||||
|
assert args == []
|
||||||
|
assert isinstance(args, list)
|
||||||
|
args.append("--foo")
|
||||||
|
# next call must return a fresh empty list, not the mutated one
|
||||||
|
assert get_default_args() == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_default_stealth_prefs_random_seed_returns_dict():
|
||||||
|
"""No seed -> fresh random fingerprint, dict has expected stealth keys."""
|
||||||
|
prefs = get_default_stealth_prefs()
|
||||||
|
assert isinstance(prefs, dict)
|
||||||
|
assert len(prefs) > 0
|
||||||
|
# humanize toggle is always set explicitly
|
||||||
|
assert "invisible_playwright.humanize" in prefs
|
||||||
|
assert prefs["invisible_playwright.humanize"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_default_stealth_prefs_seed_is_deterministic():
|
||||||
|
"""Same seed -> byte-identical prefs across calls."""
|
||||||
|
a = get_default_stealth_prefs(seed=42)
|
||||||
|
b = get_default_stealth_prefs(seed=42)
|
||||||
|
assert a == b
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_default_stealth_prefs_different_seeds_differ():
|
||||||
|
"""Different seeds -> different prefs."""
|
||||||
|
a = get_default_stealth_prefs(seed=1)
|
||||||
|
b = get_default_stealth_prefs(seed=2)
|
||||||
|
assert a != b
|
||||||
|
|
||||||
|
|
||||||
|
def test_humanize_false_disables_prefs():
|
||||||
|
"""humanize=False removes the maxTime knob and flips the toggle to False."""
|
||||||
|
prefs = get_default_stealth_prefs(seed=42, humanize=False)
|
||||||
|
assert prefs["invisible_playwright.humanize"] is False
|
||||||
|
assert "invisible_playwright.humanize.maxTime" not in prefs
|
||||||
|
|
||||||
|
|
||||||
|
def test_humanize_default_sets_max_time_1_5():
|
||||||
|
"""humanize=True -> default maxTime is 1.5s, stored as string."""
|
||||||
|
prefs = get_default_stealth_prefs(seed=42, humanize=True)
|
||||||
|
assert prefs["invisible_playwright.humanize"] is True
|
||||||
|
assert prefs["invisible_playwright.humanize.maxTime"] == "1.5"
|
||||||
|
|
||||||
|
|
||||||
|
def test_humanize_float_overrides_max_time():
|
||||||
|
"""Float for humanize is the explicit cap in seconds."""
|
||||||
|
prefs = get_default_stealth_prefs(seed=42, humanize=3.0)
|
||||||
|
assert prefs["invisible_playwright.humanize"] is True
|
||||||
|
assert prefs["invisible_playwright.humanize.maxTime"] == "3.0"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_prefs_overlay_takes_precedence():
|
||||||
|
"""extra_prefs overlay LAST overrides any baseline value."""
|
||||||
|
prefs = get_default_stealth_prefs(
|
||||||
|
seed=42, extra_prefs={"some.custom.pref": 999}
|
||||||
|
)
|
||||||
|
assert prefs["some.custom.pref"] == 999
|
||||||
|
|
||||||
|
|
||||||
|
def test_extra_prefs_can_override_baseline():
|
||||||
|
"""A key in extra_prefs that also exists in baseline gets overridden."""
|
||||||
|
baseline = get_default_stealth_prefs(seed=42)
|
||||||
|
a_baseline_key = next(iter(baseline.keys()))
|
||||||
|
overridden = get_default_stealth_prefs(
|
||||||
|
seed=42, extra_prefs={a_baseline_key: "OVERRIDDEN_SENTINEL"}
|
||||||
|
)
|
||||||
|
assert overridden[a_baseline_key] == "OVERRIDDEN_SENTINEL"
|
||||||
|
|
||||||
|
|
||||||
|
def test_locale_argument_changes_prefs():
|
||||||
|
"""Different locales produce different prefs (Accept-Language affected)."""
|
||||||
|
en = get_default_stealth_prefs(seed=42, locale="en-US")
|
||||||
|
it = get_default_stealth_prefs(seed=42, locale="it-IT")
|
||||||
|
assert en != it
|
||||||
|
|
||||||
|
|
||||||
|
def test_timezone_argument_changes_prefs():
|
||||||
|
"""Different timezones produce different prefs."""
|
||||||
|
ny = get_default_stealth_prefs(seed=42, timezone="America/New_York")
|
||||||
|
rome = get_default_stealth_prefs(seed=42, timezone="Europe/Rome")
|
||||||
|
assert ny != rome
|
||||||
|
|
||||||
|
|
||||||
|
def test_pin_argument_forces_specific_fields():
|
||||||
|
"""Pin forces a specific field while the rest stays seed-derived."""
|
||||||
|
plain = get_default_stealth_prefs(seed=42)
|
||||||
|
pinned = get_default_stealth_prefs(
|
||||||
|
seed=42, pin={"hardware.concurrency": 999}
|
||||||
|
)
|
||||||
|
# something in the dict must differ vs the plain seed=42 build
|
||||||
|
assert plain != pinned
|
||||||
|
|
||||||
|
|
||||||
|
def test_public_import_matches_direct_import():
|
||||||
|
"""Top-level re-export and direct module import return identical output."""
|
||||||
|
a = get_default_stealth_prefs(seed=42)
|
||||||
|
b = _direct(seed=42)
|
||||||
|
assert a == b
|
||||||
|
|
||||||
|
|
||||||
|
def test_ensure_binary_is_callable_via_public_namespace():
|
||||||
|
"""ensure_binary is re-exported and stays callable from the package root."""
|
||||||
|
# We don't invoke it (would trigger a network download in CI) — just
|
||||||
|
# verify the public attribute is the same callable as the underlying.
|
||||||
|
from invisible_playwright.download import ensure_binary as _direct_eb
|
||||||
|
assert ensure_binary is _direct_eb
|
||||||
18
tests/vendor/README.md
vendored
Normal file
18
tests/vendor/README.md
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Vendored detection libraries (test-only)
|
||||||
|
|
||||||
|
These are upstream, unmodified, MIT-licensed browser-fingerprinting / bot-detection
|
||||||
|
libraries, vendored so the detector e2e tests run **hermetically and identically**
|
||||||
|
on a dev box and on a GitHub runner (no external CDN at test time — Firefox
|
||||||
|
tracking-protection blocks the openfpcdn.io CDN anyway, and we want CI offline).
|
||||||
|
|
||||||
|
They are served from a localhost HTTP server and loaded into the patched Firefox;
|
||||||
|
the tests assert the REAL detectors don't flag the stealth build (BotD: `bot===false`)
|
||||||
|
and that the fingerprint is stable (FingerprintJS: same `visitorId` across launches).
|
||||||
|
|
||||||
|
| File | Package | Version | Source | License |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| `botd-2.0.0.esm.js` | `@fingerprintjs/botd` | 2.0.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/botd@2.0.0/dist/botd.esm.js | MIT |
|
||||||
|
| `fingerprintjs-5.2.0.umd.min.js` | `@fingerprintjs/fingerprintjs` | 5.2.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/fingerprintjs@5.2.0/dist/fp.umd.min.js | MIT |
|
||||||
|
|
||||||
|
Both are MIT (Copyright © FingerprintJS, Inc.). To update: download the pinned
|
||||||
|
dist from jsdelivr, drop it here, and bump the version in the filename + this table.
|
||||||
811
tests/vendor/botd-2.0.0.esm.js
vendored
Normal file
811
tests/vendor/botd-2.0.0.esm.js
vendored
Normal file
|
|
@ -0,0 +1,811 @@
|
||||||
|
/**
|
||||||
|
* Fingerprint BotD v2.0.0 - Copyright (c) FingerprintJS, Inc, 2025 (https://fingerprint.com)
|
||||||
|
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
||||||
|
*/
|
||||||
|
|
||||||
|
var version = "2.0.0";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enum for types of bots.
|
||||||
|
* Specific types of bots come first, followed by automation technologies.
|
||||||
|
*
|
||||||
|
* @readonly
|
||||||
|
* @enum {string}
|
||||||
|
*/
|
||||||
|
const BotKind = {
|
||||||
|
// Object is used instead of Typescript enum to avoid emitting IIFE which might be affected by further tree-shaking.
|
||||||
|
// See example of compiled enums https://stackoverflow.com/q/47363996)
|
||||||
|
Awesomium: 'awesomium',
|
||||||
|
Cef: 'cef',
|
||||||
|
CefSharp: 'cefsharp',
|
||||||
|
CoachJS: 'coachjs',
|
||||||
|
Electron: 'electron',
|
||||||
|
FMiner: 'fminer',
|
||||||
|
Geb: 'geb',
|
||||||
|
NightmareJS: 'nightmarejs',
|
||||||
|
Phantomas: 'phantomas',
|
||||||
|
PhantomJS: 'phantomjs',
|
||||||
|
Rhino: 'rhino',
|
||||||
|
Selenium: 'selenium',
|
||||||
|
Sequentum: 'sequentum',
|
||||||
|
SlimerJS: 'slimerjs',
|
||||||
|
WebDriverIO: 'webdriverio',
|
||||||
|
WebDriver: 'webdriver',
|
||||||
|
HeadlessChrome: 'headless_chrome',
|
||||||
|
Unknown: 'unknown',
|
||||||
|
};
|
||||||
|
/**
|
||||||
|
* Bot detection error.
|
||||||
|
*/
|
||||||
|
class BotdError extends Error {
|
||||||
|
/**
|
||||||
|
* Creates a new BotdError.
|
||||||
|
*
|
||||||
|
* @class
|
||||||
|
*/
|
||||||
|
constructor(state, message) {
|
||||||
|
super(message);
|
||||||
|
this.state = state;
|
||||||
|
this.name = 'BotdError';
|
||||||
|
Object.setPrototypeOf(this, BotdError.prototype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detect(components, detectors) {
|
||||||
|
const detections = {};
|
||||||
|
let finalDetection = {
|
||||||
|
bot: false,
|
||||||
|
};
|
||||||
|
for (const detectorName in detectors) {
|
||||||
|
const detector = detectors[detectorName];
|
||||||
|
const detectorRes = detector(components);
|
||||||
|
let detection = { bot: false };
|
||||||
|
if (typeof detectorRes === 'string') {
|
||||||
|
detection = { bot: true, botKind: detectorRes };
|
||||||
|
}
|
||||||
|
else if (detectorRes) {
|
||||||
|
detection = { bot: true, botKind: BotKind.Unknown };
|
||||||
|
}
|
||||||
|
detections[detectorName] = detection;
|
||||||
|
if (detection.bot) {
|
||||||
|
finalDetection = detection;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return [detections, finalDetection];
|
||||||
|
}
|
||||||
|
async function collect(sources) {
|
||||||
|
const components = {};
|
||||||
|
const sourcesKeys = Object.keys(sources);
|
||||||
|
await Promise.all(sourcesKeys.map(async (sourceKey) => {
|
||||||
|
const res = sources[sourceKey];
|
||||||
|
try {
|
||||||
|
components[sourceKey] = {
|
||||||
|
value: await res(),
|
||||||
|
state: 0 /* State.Success */,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
if (error instanceof BotdError) {
|
||||||
|
components[sourceKey] = {
|
||||||
|
state: error.state,
|
||||||
|
error: `${error.name}: ${error.message}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
components[sourceKey] = {
|
||||||
|
state: -3 /* State.UnexpectedBehaviour */,
|
||||||
|
error: error instanceof Error ? `${error.name}: ${error.message}` : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
return components;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectAppVersion({ appVersion }) {
|
||||||
|
if (appVersion.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if (/headless/i.test(appVersion.value))
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
if (/electron/i.test(appVersion.value))
|
||||||
|
return BotKind.Electron;
|
||||||
|
if (/slimerjs/i.test(appVersion.value))
|
||||||
|
return BotKind.SlimerJS;
|
||||||
|
}
|
||||||
|
|
||||||
|
function arrayIncludes(arr, value) {
|
||||||
|
return arr.indexOf(value) !== -1;
|
||||||
|
}
|
||||||
|
function strIncludes(str, value) {
|
||||||
|
return str.indexOf(value) !== -1;
|
||||||
|
}
|
||||||
|
function arrayFind(array, callback) {
|
||||||
|
if ('find' in array)
|
||||||
|
return array.find(callback);
|
||||||
|
for (let i = 0; i < array.length; i++) {
|
||||||
|
if (callback(array[i], i, array))
|
||||||
|
return array[i];
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getObjectProps(obj) {
|
||||||
|
return Object.getOwnPropertyNames(obj);
|
||||||
|
}
|
||||||
|
function includes(arr, ...keys) {
|
||||||
|
for (const key of keys) {
|
||||||
|
if (typeof key === 'string') {
|
||||||
|
if (arrayIncludes(arr, key))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const match = arrayFind(arr, (value) => key.test(value));
|
||||||
|
if (match != null)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
function countTruthy(values) {
|
||||||
|
return values.reduce((sum, value) => sum + (value ? 1 : 0), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectDocumentAttributes({ documentElementKeys }) {
|
||||||
|
if (documentElementKeys.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if (includes(documentElementKeys.value, 'selenium', 'webdriver', 'driver')) {
|
||||||
|
return BotKind.Selenium;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectErrorTrace({ errorTrace }) {
|
||||||
|
if (errorTrace.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if (/PhantomJS/i.test(errorTrace.value))
|
||||||
|
return BotKind.PhantomJS;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectEvalLengthInconsistency({ evalLength, browserKind, browserEngineKind, }) {
|
||||||
|
if (evalLength.state !== 0 /* State.Success */ ||
|
||||||
|
browserKind.state !== 0 /* State.Success */ ||
|
||||||
|
browserEngineKind.state !== 0 /* State.Success */)
|
||||||
|
return;
|
||||||
|
const length = evalLength.value;
|
||||||
|
if (browserEngineKind.value === "unknown" /* BrowserEngineKind.Unknown */)
|
||||||
|
return false;
|
||||||
|
return ((length === 37 && !arrayIncludes(["webkit" /* BrowserEngineKind.Webkit */, "gecko" /* BrowserEngineKind.Gecko */], browserEngineKind.value)) ||
|
||||||
|
(length === 39 && !arrayIncludes(["internet_explorer" /* BrowserKind.IE */], browserKind.value)) ||
|
||||||
|
(length === 33 && !arrayIncludes(["chromium" /* BrowserEngineKind.Chromium */], browserEngineKind.value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectFunctionBind({ functionBind }) {
|
||||||
|
if (functionBind.state === -2 /* State.NotFunction */)
|
||||||
|
return BotKind.PhantomJS;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectLanguagesLengthInconsistency({ languages }) {
|
||||||
|
if (languages.state === 0 /* State.Success */ && languages.value.length === 0) {
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectMimeTypesConsistent({ mimeTypesConsistent }) {
|
||||||
|
if (mimeTypesConsistent.state === 0 /* State.Success */ && !mimeTypesConsistent.value) {
|
||||||
|
return BotKind.Unknown;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectNotificationPermissions({ notificationPermissions, browserKind, }) {
|
||||||
|
if (browserKind.state !== 0 /* State.Success */ || browserKind.value !== "chrome" /* BrowserKind.Chrome */)
|
||||||
|
return false;
|
||||||
|
if (notificationPermissions.state === 0 /* State.Success */ && notificationPermissions.value) {
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectPluginsArray({ pluginsArray }) {
|
||||||
|
if (pluginsArray.state === 0 /* State.Success */ && !pluginsArray.value)
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectPluginsLengthInconsistency({ pluginsLength, android, browserKind, browserEngineKind, }) {
|
||||||
|
if (pluginsLength.state !== 0 /* State.Success */ ||
|
||||||
|
android.state !== 0 /* State.Success */ ||
|
||||||
|
browserKind.state !== 0 /* State.Success */ ||
|
||||||
|
browserEngineKind.state !== 0 /* State.Success */)
|
||||||
|
return;
|
||||||
|
if (browserKind.value !== "chrome" /* BrowserKind.Chrome */ ||
|
||||||
|
android.value ||
|
||||||
|
browserEngineKind.value !== "chromium" /* BrowserEngineKind.Chromium */)
|
||||||
|
return;
|
||||||
|
if (pluginsLength.value === 0)
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectProcess({ process }) {
|
||||||
|
var _a;
|
||||||
|
if (process.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if (process.value.type === 'renderer' || ((_a = process.value.versions) === null || _a === void 0 ? void 0 : _a.electron) != null)
|
||||||
|
return BotKind.Electron;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectProductSub({ productSub, browserKind }) {
|
||||||
|
if (productSub.state !== 0 /* State.Success */ || browserKind.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if ((browserKind.value === "chrome" /* BrowserKind.Chrome */ ||
|
||||||
|
browserKind.value === "safari" /* BrowserKind.Safari */ ||
|
||||||
|
browserKind.value === "opera" /* BrowserKind.Opera */ ||
|
||||||
|
browserKind.value === "wechat" /* BrowserKind.WeChat */) &&
|
||||||
|
productSub.value !== '20030107')
|
||||||
|
return BotKind.Unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectUserAgent({ userAgent }) {
|
||||||
|
if (userAgent.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if (/PhantomJS/i.test(userAgent.value))
|
||||||
|
return BotKind.PhantomJS;
|
||||||
|
if (/Headless/i.test(userAgent.value))
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
if (/Electron/i.test(userAgent.value))
|
||||||
|
return BotKind.Electron;
|
||||||
|
if (/slimerjs/i.test(userAgent.value))
|
||||||
|
return BotKind.SlimerJS;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectWebDriver({ webDriver }) {
|
||||||
|
if (webDriver.state === 0 /* State.Success */ && webDriver.value)
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectWebGL({ webGL }) {
|
||||||
|
if (webGL.state === 0 /* State.Success */) {
|
||||||
|
const { vendor, renderer } = webGL.value;
|
||||||
|
if (vendor == 'Brian Paul' && renderer == 'Mesa OffScreen') {
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectWindowExternal({ windowExternal }) {
|
||||||
|
if (windowExternal.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
if (/Sequentum/i.test(windowExternal.value))
|
||||||
|
return BotKind.Sequentum;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectWindowSize({ windowSize, documentFocus }) {
|
||||||
|
if (windowSize.state !== 0 /* State.Success */ || documentFocus.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
const { outerWidth, outerHeight } = windowSize.value;
|
||||||
|
// When a page is opened in a new tab without focusing it right away, the window outer size is 0x0
|
||||||
|
if (!documentFocus.value)
|
||||||
|
return;
|
||||||
|
if (outerWidth === 0 && outerHeight === 0)
|
||||||
|
return BotKind.HeadlessChrome;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectDistinctiveProperties({ distinctiveProps }) {
|
||||||
|
if (distinctiveProps.state !== 0 /* State.Success */)
|
||||||
|
return false;
|
||||||
|
const value = distinctiveProps.value;
|
||||||
|
let bot;
|
||||||
|
for (bot in value)
|
||||||
|
if (value[bot])
|
||||||
|
return bot;
|
||||||
|
}
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
|
||||||
|
const detectors = {
|
||||||
|
detectAppVersion,
|
||||||
|
detectDocumentAttributes,
|
||||||
|
detectErrorTrace,
|
||||||
|
detectEvalLengthInconsistency,
|
||||||
|
detectFunctionBind,
|
||||||
|
detectLanguagesLengthInconsistency,
|
||||||
|
detectNotificationPermissions,
|
||||||
|
detectPluginsArray,
|
||||||
|
detectPluginsLengthInconsistency,
|
||||||
|
detectProcess,
|
||||||
|
detectUserAgent,
|
||||||
|
detectWebDriver,
|
||||||
|
detectWebGL,
|
||||||
|
detectWindowExternal,
|
||||||
|
detectWindowSize,
|
||||||
|
detectMimeTypesConsistent,
|
||||||
|
detectProductSub,
|
||||||
|
detectDistinctiveProperties,
|
||||||
|
};
|
||||||
|
|
||||||
|
function getAppVersion() {
|
||||||
|
const appVersion = navigator.appVersion;
|
||||||
|
if (appVersion == undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.appVersion is undefined');
|
||||||
|
}
|
||||||
|
return appVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDocumentElementKeys() {
|
||||||
|
if (document.documentElement === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'document.documentElement is undefined');
|
||||||
|
}
|
||||||
|
const { documentElement } = document;
|
||||||
|
if (typeof documentElement.getAttributeNames !== 'function') {
|
||||||
|
throw new BotdError(-2 /* State.NotFunction */, 'document.documentElement.getAttributeNames is not a function');
|
||||||
|
}
|
||||||
|
return documentElement.getAttributeNames();
|
||||||
|
}
|
||||||
|
|
||||||
|
function getErrorTrace() {
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||||
|
// @ts-ignore
|
||||||
|
null[0]();
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
if (error instanceof Error && error['stack'] != null) {
|
||||||
|
return error.stack.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'errorTrace signal unexpected behaviour');
|
||||||
|
}
|
||||||
|
|
||||||
|
function getEvalLength() {
|
||||||
|
return eval.toString().length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFunctionBind() {
|
||||||
|
if (Function.prototype.bind === undefined) {
|
||||||
|
throw new BotdError(-2 /* State.NotFunction */, 'Function.prototype.bind is undefined');
|
||||||
|
}
|
||||||
|
return Function.prototype.bind.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function getBrowserEngineKind() {
|
||||||
|
var _a, _b;
|
||||||
|
// Based on research in October 2020. Tested to detect Chromium 42-86.
|
||||||
|
const w = window;
|
||||||
|
const n = navigator;
|
||||||
|
if (countTruthy([
|
||||||
|
'webkitPersistentStorage' in n,
|
||||||
|
'webkitTemporaryStorage' in n,
|
||||||
|
n.vendor.indexOf('Google') === 0,
|
||||||
|
'webkitResolveLocalFileSystemURL' in w,
|
||||||
|
'BatteryManager' in w,
|
||||||
|
'webkitMediaStream' in w,
|
||||||
|
'webkitSpeechGrammar' in w,
|
||||||
|
]) >= 5) {
|
||||||
|
return "chromium" /* BrowserEngineKind.Chromium */;
|
||||||
|
}
|
||||||
|
if (countTruthy([
|
||||||
|
'ApplePayError' in w,
|
||||||
|
'CSSPrimitiveValue' in w,
|
||||||
|
'Counter' in w,
|
||||||
|
n.vendor.indexOf('Apple') === 0,
|
||||||
|
'getStorageUpdates' in n,
|
||||||
|
'WebKitMediaKeys' in w,
|
||||||
|
]) >= 4) {
|
||||||
|
return "webkit" /* BrowserEngineKind.Webkit */;
|
||||||
|
}
|
||||||
|
if (countTruthy([
|
||||||
|
'buildID' in navigator,
|
||||||
|
'MozAppearance' in ((_b = (_a = document.documentElement) === null || _a === void 0 ? void 0 : _a.style) !== null && _b !== void 0 ? _b : {}),
|
||||||
|
'onmozfullscreenchange' in w,
|
||||||
|
'mozInnerScreenX' in w,
|
||||||
|
'CSSMozDocumentRule' in w,
|
||||||
|
'CanvasCaptureMediaStream' in w,
|
||||||
|
]) >= 4) {
|
||||||
|
return "gecko" /* BrowserEngineKind.Gecko */;
|
||||||
|
}
|
||||||
|
return "unknown" /* BrowserEngineKind.Unknown */;
|
||||||
|
}
|
||||||
|
function getBrowserKind() {
|
||||||
|
var _a;
|
||||||
|
const userAgent = (_a = navigator.userAgent) === null || _a === void 0 ? void 0 : _a.toLowerCase();
|
||||||
|
if (strIncludes(userAgent, 'edg/')) {
|
||||||
|
return "edge" /* BrowserKind.Edge */;
|
||||||
|
}
|
||||||
|
else if (strIncludes(userAgent, 'trident') || strIncludes(userAgent, 'msie')) {
|
||||||
|
return "internet_explorer" /* BrowserKind.IE */;
|
||||||
|
}
|
||||||
|
else if (strIncludes(userAgent, 'wechat')) {
|
||||||
|
return "wechat" /* BrowserKind.WeChat */;
|
||||||
|
}
|
||||||
|
else if (strIncludes(userAgent, 'firefox')) {
|
||||||
|
return "firefox" /* BrowserKind.Firefox */;
|
||||||
|
}
|
||||||
|
else if (strIncludes(userAgent, 'opera') || strIncludes(userAgent, 'opr')) {
|
||||||
|
return "opera" /* BrowserKind.Opera */;
|
||||||
|
}
|
||||||
|
else if (strIncludes(userAgent, 'chrome')) {
|
||||||
|
return "chrome" /* BrowserKind.Chrome */;
|
||||||
|
}
|
||||||
|
else if (strIncludes(userAgent, 'safari')) {
|
||||||
|
return "safari" /* BrowserKind.Safari */;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return "unknown" /* BrowserKind.Unknown */;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Source: https://github.com/fingerprintjs/fingerprintjs/blob/master/src/utils/browser.ts#L223
|
||||||
|
function isAndroid() {
|
||||||
|
const browserEngineKind = getBrowserEngineKind();
|
||||||
|
const isItChromium = browserEngineKind === "chromium" /* BrowserEngineKind.Chromium */;
|
||||||
|
const isItGecko = browserEngineKind === "gecko" /* BrowserEngineKind.Gecko */;
|
||||||
|
const w = window;
|
||||||
|
const n = navigator;
|
||||||
|
const c = 'connection';
|
||||||
|
// Chrome removes all words "Android" from `navigator` when desktop version is requested
|
||||||
|
// Firefox keeps "Android" in `navigator.appVersion` when desktop version is requested
|
||||||
|
if (isItChromium) {
|
||||||
|
return (countTruthy([
|
||||||
|
!('SharedWorker' in w),
|
||||||
|
// `typechange` is deprecated, but it's still present on Android (tested on Chrome Mobile 117)
|
||||||
|
// Removal proposal https://bugs.chromium.org/p/chromium/issues/detail?id=699892
|
||||||
|
// Note: this expression returns true on ChromeOS, so additional detectors are required to avoid false-positives
|
||||||
|
n[c] && 'ontypechange' in n[c],
|
||||||
|
!('sinkId' in new Audio()),
|
||||||
|
]) >= 2);
|
||||||
|
}
|
||||||
|
else if (isItGecko) {
|
||||||
|
return countTruthy(['onorientationchange' in w, 'orientation' in w, /android/i.test(n.appVersion)]) >= 2;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Only 2 browser engines are presented on Android.
|
||||||
|
// Actually, there is also Android 4.1 browser, but it's not worth detecting it at the moment.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function getDocumentFocus() {
|
||||||
|
if (document.hasFocus === undefined) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return document.hasFocus();
|
||||||
|
}
|
||||||
|
function isChromium86OrNewer() {
|
||||||
|
// Checked in Chrome 85 vs Chrome 86 both on desktop and Android. Checked in macOS Chrome 128, Android Chrome 127.
|
||||||
|
const w = window;
|
||||||
|
return (countTruthy([
|
||||||
|
!('MediaSettingsRange' in w),
|
||||||
|
'RTCEncodedAudioFrame' in w,
|
||||||
|
'' + w.Intl === '[object Intl]',
|
||||||
|
'' + w.Reflect === '[object Reflect]',
|
||||||
|
]) >= 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getLanguages() {
|
||||||
|
const n = navigator;
|
||||||
|
const result = [];
|
||||||
|
const language = n.language || n.userLanguage || n.browserLanguage || n.systemLanguage;
|
||||||
|
if (language !== undefined) {
|
||||||
|
result.push([language]);
|
||||||
|
}
|
||||||
|
if (Array.isArray(n.languages)) {
|
||||||
|
const browserEngine = getBrowserEngineKind();
|
||||||
|
// Starting from Chromium 86, there is only a single value in `navigator.language` in Incognito mode:
|
||||||
|
// the value of `navigator.language`. Therefore, the value is ignored in this browser.
|
||||||
|
if (!(browserEngine === "chromium" /* BrowserEngineKind.Chromium */ && isChromium86OrNewer())) {
|
||||||
|
result.push(n.languages);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (typeof n.languages === 'string') {
|
||||||
|
const languages = n.languages;
|
||||||
|
if (languages) {
|
||||||
|
result.push(languages.split(','));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
function areMimeTypesConsistent() {
|
||||||
|
if (navigator.mimeTypes === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.mimeTypes is undefined');
|
||||||
|
}
|
||||||
|
const { mimeTypes } = navigator;
|
||||||
|
let isConsistent = Object.getPrototypeOf(mimeTypes) === MimeTypeArray.prototype;
|
||||||
|
for (let i = 0; i < mimeTypes.length; i++) {
|
||||||
|
isConsistent && (isConsistent = Object.getPrototypeOf(mimeTypes[i]) === MimeType.prototype);
|
||||||
|
}
|
||||||
|
return isConsistent;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getNotificationPermissions() {
|
||||||
|
if (window.Notification === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'window.Notification is undefined');
|
||||||
|
}
|
||||||
|
if (navigator.permissions === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.permissions is undefined');
|
||||||
|
}
|
||||||
|
const { permissions } = navigator;
|
||||||
|
if (typeof permissions.query !== 'function') {
|
||||||
|
throw new BotdError(-2 /* State.NotFunction */, 'navigator.permissions.query is not a function');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const permissionStatus = await permissions.query({ name: 'notifications' });
|
||||||
|
return window.Notification.permission === 'denied' && permissionStatus.state === 'prompt';
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'notificationPermissions signal unexpected behaviour');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getPluginsArray() {
|
||||||
|
if (navigator.plugins === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.plugins is undefined');
|
||||||
|
}
|
||||||
|
if (window.PluginArray === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'window.PluginArray is undefined');
|
||||||
|
}
|
||||||
|
return navigator.plugins instanceof PluginArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getPluginsLength() {
|
||||||
|
if (navigator.plugins === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.plugins is undefined');
|
||||||
|
}
|
||||||
|
if (navigator.plugins.length === undefined) {
|
||||||
|
throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'navigator.plugins.length is undefined');
|
||||||
|
}
|
||||||
|
return navigator.plugins.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getProcess() {
|
||||||
|
const { process } = window;
|
||||||
|
const errorPrefix = 'window.process is';
|
||||||
|
if (process === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, `${errorPrefix} undefined`);
|
||||||
|
}
|
||||||
|
if (process && typeof process !== 'object') {
|
||||||
|
throw new BotdError(-3 /* State.UnexpectedBehaviour */, `${errorPrefix} not an object`);
|
||||||
|
}
|
||||||
|
return process;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getProductSub() {
|
||||||
|
const { productSub } = navigator;
|
||||||
|
if (productSub === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.productSub is undefined');
|
||||||
|
}
|
||||||
|
return productSub;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getRTT() {
|
||||||
|
if (navigator.connection === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.connection is undefined');
|
||||||
|
}
|
||||||
|
if (navigator.connection.rtt === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.connection.rtt is undefined');
|
||||||
|
}
|
||||||
|
return navigator.connection.rtt;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getUserAgent() {
|
||||||
|
return navigator.userAgent;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWebDriver() {
|
||||||
|
if (navigator.webdriver == undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'navigator.webdriver is undefined');
|
||||||
|
}
|
||||||
|
return navigator.webdriver;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWebGL() {
|
||||||
|
const canvasElement = document.createElement('canvas');
|
||||||
|
if (typeof canvasElement.getContext !== 'function') {
|
||||||
|
throw new BotdError(-2 /* State.NotFunction */, 'HTMLCanvasElement.getContext is not a function');
|
||||||
|
}
|
||||||
|
const webGLContext = canvasElement.getContext('webgl');
|
||||||
|
if (webGLContext === null) {
|
||||||
|
throw new BotdError(-4 /* State.Null */, 'WebGLRenderingContext is null');
|
||||||
|
}
|
||||||
|
if (typeof webGLContext.getParameter !== 'function') {
|
||||||
|
throw new BotdError(-2 /* State.NotFunction */, 'WebGLRenderingContext.getParameter is not a function');
|
||||||
|
}
|
||||||
|
const vendor = webGLContext.getParameter(webGLContext.VENDOR);
|
||||||
|
const renderer = webGLContext.getParameter(webGLContext.RENDERER);
|
||||||
|
return { vendor: vendor, renderer: renderer };
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWindowExternal() {
|
||||||
|
if (window.external === undefined) {
|
||||||
|
throw new BotdError(-1 /* State.Undefined */, 'window.external is undefined');
|
||||||
|
}
|
||||||
|
const { external } = window;
|
||||||
|
if (typeof external.toString !== 'function') {
|
||||||
|
throw new BotdError(-2 /* State.NotFunction */, 'window.external.toString is not a function');
|
||||||
|
}
|
||||||
|
return external.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWindowSize() {
|
||||||
|
return {
|
||||||
|
outerWidth: window.outerWidth,
|
||||||
|
outerHeight: window.outerHeight,
|
||||||
|
innerWidth: window.innerWidth,
|
||||||
|
innerHeight: window.innerHeight,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function checkDistinctiveProperties() {
|
||||||
|
// The order in the following list matters, because specific types of bots come first, followed by automation technologies.
|
||||||
|
const distinctivePropsList = {
|
||||||
|
[BotKind.Awesomium]: {
|
||||||
|
window: ['awesomium'],
|
||||||
|
},
|
||||||
|
[BotKind.Cef]: {
|
||||||
|
window: ['RunPerfTest'],
|
||||||
|
},
|
||||||
|
[BotKind.CefSharp]: {
|
||||||
|
window: ['CefSharp'],
|
||||||
|
},
|
||||||
|
[BotKind.CoachJS]: {
|
||||||
|
window: ['emit'],
|
||||||
|
},
|
||||||
|
[BotKind.FMiner]: {
|
||||||
|
window: ['fmget_targets'],
|
||||||
|
},
|
||||||
|
[BotKind.Geb]: {
|
||||||
|
window: ['geb'],
|
||||||
|
},
|
||||||
|
[BotKind.NightmareJS]: {
|
||||||
|
window: ['__nightmare', 'nightmare'],
|
||||||
|
},
|
||||||
|
[BotKind.Phantomas]: {
|
||||||
|
window: ['__phantomas'],
|
||||||
|
},
|
||||||
|
[BotKind.PhantomJS]: {
|
||||||
|
window: ['callPhantom', '_phantom'],
|
||||||
|
},
|
||||||
|
[BotKind.Rhino]: {
|
||||||
|
window: ['spawn'],
|
||||||
|
},
|
||||||
|
[BotKind.Selenium]: {
|
||||||
|
window: ['_Selenium_IDE_Recorder', '_selenium', 'calledSelenium', /^([a-z]){3}_.*_(Array|Promise|Symbol)$/],
|
||||||
|
document: ['__selenium_evaluate', 'selenium-evaluate', '__selenium_unwrapped'],
|
||||||
|
},
|
||||||
|
[BotKind.WebDriverIO]: {
|
||||||
|
window: ['wdioElectron'],
|
||||||
|
},
|
||||||
|
[BotKind.WebDriver]: {
|
||||||
|
window: [
|
||||||
|
'webdriver',
|
||||||
|
'__webdriverFunc',
|
||||||
|
'__lastWatirAlert',
|
||||||
|
'__lastWatirConfirm',
|
||||||
|
'__lastWatirPrompt',
|
||||||
|
'_WEBDRIVER_ELEM_CACHE',
|
||||||
|
'ChromeDriverw',
|
||||||
|
],
|
||||||
|
document: [
|
||||||
|
'__webdriver_script_fn',
|
||||||
|
'__driver_evaluate',
|
||||||
|
'__webdriver_evaluate',
|
||||||
|
'__fxdriver_evaluate',
|
||||||
|
'__driver_unwrapped',
|
||||||
|
'__webdriver_unwrapped',
|
||||||
|
'__fxdriver_unwrapped',
|
||||||
|
'__webdriver_script_fn',
|
||||||
|
'__webdriver_script_func',
|
||||||
|
'__webdriver_script_function',
|
||||||
|
'$cdc_asdjflasutopfhvcZLmcf',
|
||||||
|
'$cdc_asdjflasutopfhvcZLmcfl_',
|
||||||
|
'$chrome_asyncScriptInfo',
|
||||||
|
'__$webdriverAsyncExecutor',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
[BotKind.HeadlessChrome]: {
|
||||||
|
window: ['domAutomation', 'domAutomationController'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
let botName;
|
||||||
|
const result = {};
|
||||||
|
const windowProps = getObjectProps(window);
|
||||||
|
let documentProps = [];
|
||||||
|
if (window.document !== undefined)
|
||||||
|
documentProps = getObjectProps(window.document);
|
||||||
|
for (botName in distinctivePropsList) {
|
||||||
|
const props = distinctivePropsList[botName];
|
||||||
|
if (props !== undefined) {
|
||||||
|
const windowContains = props.window === undefined ? false : includes(windowProps, ...props.window);
|
||||||
|
const documentContains = props.document === undefined || !documentProps.length ? false : includes(documentProps, ...props.document);
|
||||||
|
result[botName] = windowContains || documentContains;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sources = {
|
||||||
|
android: isAndroid,
|
||||||
|
browserKind: getBrowserKind,
|
||||||
|
browserEngineKind: getBrowserEngineKind,
|
||||||
|
documentFocus: getDocumentFocus,
|
||||||
|
userAgent: getUserAgent,
|
||||||
|
appVersion: getAppVersion,
|
||||||
|
rtt: getRTT,
|
||||||
|
windowSize: getWindowSize,
|
||||||
|
pluginsLength: getPluginsLength,
|
||||||
|
pluginsArray: getPluginsArray,
|
||||||
|
errorTrace: getErrorTrace,
|
||||||
|
productSub: getProductSub,
|
||||||
|
windowExternal: getWindowExternal,
|
||||||
|
mimeTypesConsistent: areMimeTypesConsistent,
|
||||||
|
evalLength: getEvalLength,
|
||||||
|
webGL: getWebGL,
|
||||||
|
webDriver: getWebDriver,
|
||||||
|
languages: getLanguages,
|
||||||
|
notificationPermissions: getNotificationPermissions,
|
||||||
|
documentElementKeys: getDocumentElementKeys,
|
||||||
|
functionBind: getFunctionBind,
|
||||||
|
process: getProcess,
|
||||||
|
distinctiveProps: checkDistinctiveProperties,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class representing a bot detector.
|
||||||
|
*
|
||||||
|
* @class
|
||||||
|
* @implements {BotDetectorInterface}
|
||||||
|
*/
|
||||||
|
class BotDetector {
|
||||||
|
constructor() {
|
||||||
|
this.components = undefined;
|
||||||
|
this.detections = undefined;
|
||||||
|
}
|
||||||
|
getComponents() {
|
||||||
|
return this.components;
|
||||||
|
}
|
||||||
|
getDetections() {
|
||||||
|
return this.detections;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @inheritdoc
|
||||||
|
*/
|
||||||
|
detect() {
|
||||||
|
if (this.components === undefined) {
|
||||||
|
throw new Error("BotDetector.detect can't be called before BotDetector.collect");
|
||||||
|
}
|
||||||
|
const [detections, finalDetection] = detect(this.components, detectors);
|
||||||
|
this.detections = detections;
|
||||||
|
return finalDetection;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @inheritdoc
|
||||||
|
*/
|
||||||
|
async collect() {
|
||||||
|
this.components = await collect(sources);
|
||||||
|
return this.components;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends an unpersonalized AJAX request to collect installation statistics
|
||||||
|
*/
|
||||||
|
function monitor() {
|
||||||
|
// The FingerprintJS CDN (https://github.com/fingerprintjs/cdn) replaces `window.__fpjs_d_m` with `true`
|
||||||
|
if (window.__fpjs_d_m || Math.random() >= 0.001) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const request = new XMLHttpRequest();
|
||||||
|
request.open('get', `https://m1.openfpcdn.io/botd/v${version}/npm-monitoring`, true);
|
||||||
|
request.send();
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
// console.error is ok here because it's an unexpected error handler
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function load({ monitoring = true } = {}) {
|
||||||
|
if (monitoring) {
|
||||||
|
monitor();
|
||||||
|
}
|
||||||
|
const detector = new BotDetector();
|
||||||
|
await detector.collect();
|
||||||
|
return detector;
|
||||||
|
}
|
||||||
|
var index = { load };
|
||||||
|
|
||||||
|
export { BotKind, BotdError, collect, index as default, detect, detectors, load, sources };
|
||||||
27
tests/vendor/fingerprintjs-5.2.0.umd.min.js
vendored
Normal file
27
tests/vendor/fingerprintjs-5.2.0.umd.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue