diff --git a/.github/ISSUE_TEMPLATE/01-launch-failure.yml b/.github/ISSUE_TEMPLATE/01-launch-failure.yml new file mode 100644 index 0000000..2c5451f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/01-launch-failure.yml @@ -0,0 +1,98 @@ +name: Launch failure +description: Browser or wrapper fails to start (install errors, missing deps, profile load fails, never reaches new_page) +title: "[launch] " +labels: ["bug", "launch-failure"] +body: + - type: markdown + attributes: + value: | + Use this when the browser never reaches a usable state. + If it starts and the bug appears on a site or clicking something, use the site/action template instead. + + - type: input + id: version + attributes: + label: Version + description: Output of `python -m invisible_playwright version`. + placeholder: 0.1.7 (binary firefox-7) + validations: + required: true + + - type: dropdown + id: os + attributes: + label: OS + options: + - Windows 10/11 x86_64 + - Linux x86_64 + - macOS (unsupported) + - Other + validations: + required: true + + - type: input + id: python + attributes: + label: Python + placeholder: 3.11.7 + validations: + required: true + + - type: input + id: install_cmd + attributes: + label: How you installed + placeholder: pip install invisible_playwright + validations: + required: true + + - type: textarea + id: snippet + attributes: + label: What you ran + description: Stop at the line that errors out. Redact creds. + render: python + value: | + from invisible_playwright import InvisiblePlaywright + with InvisiblePlaywright(seed=42) as browser: + ctx = browser.new_context() + validations: + required: true + + - type: textarea + id: traceback + attributes: + label: Full traceback + description: The whole stack trace verbatim. Don't summarize. + render: text + validations: + required: true + + - type: textarea + id: logs + attributes: + label: Extra logs + description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1`. Optional but speeds things up. + render: text + validations: + required: false + + - type: textarea + id: tried + attributes: + label: What you already tried + description: Reinstall, clear cache, different Python version, different proxy, etc. + validations: + required: false + + - type: checkboxes + id: confirm + attributes: + label: Before submitting + options: + - label: Searched existing issues. + required: true + - label: On the latest released version. + required: true + - label: Removed credentials and personal paths from the snippet and logs. + required: true diff --git a/.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml b/.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml new file mode 100644 index 0000000..6c38de6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml @@ -0,0 +1,167 @@ +name: Site or action bug +description: Browser starts fine but a navigation, click, evaluate, or other operation fails or behaves wrong +title: "[bug] " +labels: ["bug"] +body: + - type: markdown + attributes: + value: | + For bugs that happen after the browser is up. + If the browser never launches, use the launch failure template. + If a fingerprint detector flags the browser, use the stealth detection template. + + - type: input + id: version + attributes: + label: Version + description: Output of `python -m invisible_playwright version`. + placeholder: 0.1.7 (binary firefox-7) + validations: + required: true + + - type: dropdown + id: os + attributes: + label: OS + options: + - Windows 10/11 x86_64 + - Linux x86_64 + - macOS (unsupported) + - Other + validations: + required: true + + - type: input + id: python + attributes: + label: Python + placeholder: 3.11.7 + validations: + required: true + + - type: dropdown + id: headless + attributes: + label: headless= + description: Some bugs only repro on Windows headless=True (hidden alt-desktop path). + options: + - "True" + - "False" + validations: + required: true + + - type: dropdown + id: proxy + attributes: + label: Proxy + description: Sites often vary by IP geo (e.g. GDPR consent shows only on UK/EU). + options: + - No proxy (host network) + - Residential, UK/GB + - Residential, US + - Residential, other country (specify in notes) + - Datacenter (specify provider in notes) + validations: + required: true + + - type: dropdown + id: profile + attributes: + label: Profile dir + options: + - Fresh each run (no profile_dir) + - Persistent profile_dir, reusing across runs + - Persistent profile_dir, first run creating it + validations: + required: true + + - type: input + id: url + attributes: + label: URL + description: The exact URL passed to `page.goto`. Not "the homepage" — the literal string. + placeholder: https://id.sky.com/ + validations: + required: true + + - type: textarea + id: snippet + attributes: + label: Runnable reproduction + description: A complete snippet we can copy, paste, run. Stub creds with placeholders, keep everything else literal. + render: python + value: | + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, headless=True) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto("https://example.com/") + # the exact operation that fails: + page.click("button:has-text('Accept all')") + validations: + required: true + + - type: input + id: selector + attributes: + label: Selector or locator + description: The exact string passed to locator/click/frame_locator. Write N/A if not a selector bug. + placeholder: page.frame_locator("iframe[id^='sp_message_iframe_']").get_by_text("Accept all") + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected + description: What should happen when the snippet runs? + validations: + required: true + + - type: textarea + id: actual + attributes: + label: Actual + description: What happens instead? Full traceback, error string verbatim, any page.on('crash') firing. + validations: + required: true + + - type: textarea + id: screenshot + attributes: + label: Screenshot + description: Drag-drop a screenshot if the bug is visual. Optional but useful. + validations: + required: false + + - type: textarea + id: logs + attributes: + label: Browser logs + description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1 | tail -200`. Redact creds and real IPs. + render: text + validations: + required: false + + - type: textarea + id: notes + attributes: + label: Notes + description: Anything else, hypotheses, related issues, things you've already tried. + validations: + required: false + + - type: checkboxes + id: confirm + attributes: + label: Before submitting + options: + - label: Searched existing issues. + required: true + - label: On the latest released version. + required: true + - label: The snippet above runs end-to-end on a clean Python install. + required: true + - label: Removed credentials, proxy passwords, real IPs, personal file paths. + required: true diff --git a/.github/ISSUE_TEMPLATE/03-stealth-detection.yml b/.github/ISSUE_TEMPLATE/03-stealth-detection.yml new file mode 100644 index 0000000..b2c5e1d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/03-stealth-detection.yml @@ -0,0 +1,141 @@ +name: Stealth detection +description: A fingerprint detector flagged the browser as a bot, VM, VPN, anti-detect, tampered, or otherwise non-human +title: "[detect] " +labels: ["bug", "stealth"] +body: + - type: markdown + attributes: + value: | + Use this when something detects the browser (Fingerprint Pro, CreepJS, BotD, reCAPTCHA, Cloudflare, sannysoft, etc). + Bugs in operations (clicks, navigation) go to the site/action template. + Browser failing to start goes to the launch failure template. + + - type: input + id: version + attributes: + label: Version + placeholder: 0.1.7 (binary firefox-7) + validations: + required: true + + - type: dropdown + id: os + attributes: + label: OS + options: + - Windows 10/11 x86_64 + - Linux x86_64 + - macOS (unsupported) + - Other + validations: + required: true + + - type: dropdown + id: headless + attributes: + label: headless= + options: + - "True" + - "False" + validations: + required: true + + - type: dropdown + id: proxy + attributes: + label: Proxy + description: Datacenter or wrong-country proxies trip most detectors regardless of the browser. Be honest about what you used. + options: + - No proxy (host network) + - Residential, matching target geo + - Residential, different geo than target + - Datacenter (specify provider in notes) + - Mobile / 4G + validations: + required: true + + - type: input + id: detector + attributes: + label: Detector name and URL + description: Exact site / service / product that flagged us. + placeholder: Fingerprint Pro — https://demo.fingerprint.com/playground + validations: + required: true + + - type: textarea + id: scores + attributes: + label: Detector verdict + description: Paste the relevant flags / scores verbatim. For Fingerprint Pro paste `bot`, `vpn`, `virtual_machine`, `tampering*`, `vm_ml_score`, `suspect_score`. For CreepJS the headless / lies / trust scores. For reCAPTCHA v3 the score number. + render: text + placeholder: | + bot: bad + vpn: true + virtual_machine: true + vm_ml_score: 0.74 + suspect_score: 22 + validations: + required: true + + - type: textarea + id: screenshot + attributes: + label: Screenshot of the detector result + description: Drag-drop a screenshot of the detector page so we see what you see. + validations: + required: true + + - type: textarea + id: snippet + attributes: + label: How you launched + description: The InvisiblePlaywright launch + navigation that produced the result above. Redact creds. + render: python + value: | + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, headless=True) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto("https://demo.fingerprint.com/playground") + validations: + required: true + + - type: textarea + id: expected + attributes: + label: What you expected + description: Most detectors will never give a perfect score for any browser. Tell us what threshold you'd accept (e.g. bot=not_detected, vm_ml_score < 0.3). + validations: + required: true + + - type: textarea + id: full_report + attributes: + label: Full detector response + description: For Fingerprint Pro paste the JSON from /api/event/v4/ if you have it. For CreepJS paste the full Smart Signals block. Optional but speeds things up a lot. + render: json + validations: + required: false + + - type: textarea + id: notes + attributes: + label: Notes + validations: + required: false + + - type: checkboxes + id: confirm + attributes: + label: Before submitting + options: + - label: Searched existing issues. + required: true + - label: On the latest released version. + required: true + - label: The detector verdict above is from a real run, not a hypothesis. + required: true + - label: Removed credentials, real IPs, FpJS visitor_id values, personal file paths from the snippet and full report. + required: true diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml deleted file mode 100644 index 805d579..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Bug report -description: Report a bug in the invisible_playwright Python wrapper -title: "[bug] " -labels: ["bug"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to file a bug report. - - Before continuing, please: - - Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues?q=is%3Aissue) to avoid duplicates. - - If the bug is in the **patched Firefox itself** (canvas/WebGL/audio/font spoofing, a detector flagging the browser), open it at [feder-cr/firefox-stealth](https://github.com/feder-cr/firefox-stealth/issues) instead. - - **Do not** report security vulnerabilities here — follow [SECURITY.md](https://github.com/feder-cr/invisible_playwright/blob/main/SECURITY.md). - - type: input - id: version - attributes: - label: invisible_playwright version - description: Output of `invisible_playwright version` - placeholder: "0.1.0 (binary 150.0.1)" - validations: - required: true - - type: dropdown - id: os - attributes: - label: Operating system - options: - - Windows x86_64 - - Linux x86_64 - - Other (please specify in description) - validations: - required: true - - type: input - id: python - attributes: - label: Python version - placeholder: "3.11.7" - validations: - required: true - - type: textarea - id: repro - attributes: - label: Minimal reproduction - description: A small, self-contained code snippet that triggers the bug. Strip out anything unrelated. - render: python - validations: - required: true - - type: textarea - id: expected - attributes: - label: Expected behavior - validations: - required: true - - type: textarea - id: actual - attributes: - label: Actual behavior - description: Include the full error message and traceback if any. - validations: - required: true - - type: textarea - id: logs - attributes: - label: Logs / additional context - description: Browser console output, environment variables, proxy config (redact credentials), etc. - render: text - validations: - required: false - - type: checkboxes - id: confirm - attributes: - label: Confirmations - options: - - label: I have searched existing issues and this bug has not been reported. - required: true - - label: I am on the latest release. - required: true - - label: I have removed any credentials, proxy passwords, or sensitive data from logs. - required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 6d3dace..44f31be 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -3,9 +3,9 @@ contact_links: - name: Security vulnerability url: https://github.com/feder-cr/invisible_playwright/security/advisories/new about: Report a security issue privately. Do NOT open a public issue. - - name: Bug in the patched Firefox itself (canvas / WebGL / fonts / WebRTC / etc.) - url: https://github.com/feder-cr/firefox-stealth/issues - about: Spoofing/fingerprint bugs belong in the firefox-stealth repo. + - name: Bug in the patched Firefox source (C++, IDL, Juggler JS) + url: https://github.com/feder-cr/invisible_firefox/issues + about: Source-level patches in the Firefox fork go in the invisible_firefox repo. Detection results (FpJS, CreepJS, etc.) use the stealth detection template here. - name: Question or general discussion url: https://github.com/feder-cr/invisible_playwright/discussions - about: For usage questions, ideas, and chat. Bugs and features still go in issues. + about: Usage questions, ideas, chat. Bugs and features still go in issues. diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..0f750f8 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,52 @@ +# ───────────────────────────────────────────────────────────────────────────── +# e2e.yml — run the FULL browser-driving e2e suite (the 127 @pytest.mark.e2e) +# on GitHub, on every push/PR to main. +# +# Why this can run on CI when the drive-gate had to stay light: the drive-gate +# launched Firefox in true HEADLESS mode, which is content-process unstable on +# the hosted runners (eval-CSP / context-destroyed). The stealth wrapper instead +# launches Firefox HEADED on a real display; under `xvfb-run` (a virtual X +# server) that's exactly what we get on a headless CI box — stable, and the same +# thing webrtc-e2e.yml already relies on. +# +# Secret-free, so it's safe in public CI: the binary is the PUBLIC firefox-9 +# release (no token), and the webrtc e2e fake a local TCP-only SOCKS. The proxy +# realness gate (fppro / smartproxy) is NOT here — it needs secrets and stays a +# local pre-release gate. +# ───────────────────────────────────────────────────────────────────────────── +name: e2e + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +permissions: + contents: read + +jobs: + e2e: + name: e2e (linux, xvfb) + runs-on: ubuntu-24.04 + timeout-minutes: 40 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: { fetch-depth: 1 } + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: { python-version: '3.11' } + - name: Install wrapper + test deps (+ pinned Playwright) + run: | + python -m pip install --upgrade pip + python -m pip install ".[dev]" + python -m pip install "playwright==$(cat scripts/playwright_pin.txt)" + - name: System deps (xvfb + Firefox runtime libs) + run: | + sudo apt-get update + sudo apt-get install -y xvfb + sudo "$(which python)" -m playwright install-deps firefox + - name: Fetch the published firefox binary + run: echo "FF=$(python -m invisible_playwright fetch | tail -1)" >> "$GITHUB_ENV" + - name: Run the full e2e suite under a virtual display + run: xvfb-run -a python scripts/run_e2e.py "$FF" diff --git a/.github/workflows/firefox-launch-matrix.yml b/.github/workflows/firefox-launch-matrix.yml new file mode 100644 index 0000000..4e7b053 --- /dev/null +++ b/.github/workflows/firefox-launch-matrix.yml @@ -0,0 +1,106 @@ +name: firefox-launch-matrix + +# Cross-Windows-edition smoke for the shipped firefox-N binary. +# Triggered by issue #22 (firefox-7 SxS mismatch on Win11 build 26200, +# reporter `jannusdorfer-create`). +# +# Runs the exact reporter snippet on every Windows runner GitHub offers, +# from a fresh checkout. If any matrix cell fails the same way, the bug +# is reproducible on at least one clean-ish environment and we ship a +# sidecar mozglue.manifest fix. If all cells pass, the bug is confined +# to the reporter's specific environment (Pro/Enterprise GPO, EDR, etc.). + +on: + workflow_dispatch: + push: + branches: [main] + paths: + - '.github/workflows/firefox-launch-matrix.yml' + +jobs: + smoke: + name: launch (${{ matrix.os }}, py${{ matrix.python }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [windows-2022, windows-2025, windows-latest] + python: ["3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: pip + + - name: Windows edition + build info + shell: pwsh + run: | + $os = Get-CimInstance Win32_OperatingSystem + Write-Host "Caption : $($os.Caption)" + Write-Host "BuildNumber: $($os.BuildNumber)" + Write-Host "OSArch : $($os.OSArchitecture)" + Write-Host "Edition : $((Get-CimInstance Win32_OperatingSystem).OperatingSystemSKU)" + Write-Host "---" + Write-Host "VC++ Redistributables installed:" + Get-ItemProperty 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\*' ` + -ErrorAction SilentlyContinue | + Where-Object { $_.DisplayName -like '*Visual C++*Redist*' } | + Select-Object DisplayName, DisplayVersion | + Format-Table -AutoSize + + - name: Install package from this commit + run: | + python -m pip install --upgrade pip + pip install . + + - name: Fetch firefox-7 binary + run: python -m invisible_playwright fetch + + - name: Verify firefox.exe can launch standalone (the snippet that fails for issue #22) + shell: pwsh + run: | + # The platformdirs path has the duplicated `invisible-playwright` segment + # on Windows (user_cache_dir convention). + $ffPath = "$env:LOCALAPPDATA\invisible-playwright\invisible-playwright\Cache\firefox-7\firefox.exe" + if (-not (Test-Path $ffPath)) { + Write-Error "firefox.exe NOT FOUND at $ffPath" + exit 1 + } + Write-Host "Launching: $ffPath --version" + # NOTE: firefox.exe --version on Windows prints the version but may + # return non-zero exit code (sub-process fork quirk). Check stdout. + $output = & $ffPath --version 2>&1 | Out-String + Write-Host "Output: $output" + if ($output -notmatch 'Mozilla Firefox \d') { + Write-Error "firefox.exe --version did not print a Mozilla Firefox version. Output was: $output" + exit 1 + } + Write-Host "OK: firefox.exe runs and prints version." + + - name: Run reporter's exact InvisiblePlaywright snippet + run: | + python -c " + import asyncio + from invisible_playwright.async_api import InvisiblePlaywright + async def main(): + async with InvisiblePlaywright(seed=9128) as browser: + page = await browser.new_page() + await page.goto('about:blank') + print('OK: page loaded, url =', page.url) + asyncio.run(main()) + " + + - name: Upload diagnostics on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: launch-failure-${{ matrix.os }}-py${{ matrix.python }} + path: | + ${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/firefox.exe + ${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/mozglue.dll + if-no-files-found: warn + retention-days: 7 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..902d374 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,402 @@ +# ───────────────────────────────────────────────────────────────────────────── +# release.yml — build all 5 patched-Firefox targets at $0 and publish them as +# DRAFT GitHub Release assets, named per the wrapper contract (constants.ARCHIVE_NAME). +# DRAFT on purpose: a human runs the realness gate and only THEN un-drafts + bumps +# BINARY_VERSION. Nothing auto-ships (issue #14 lesson). +# +# PACKAGING (issue #14: dangling symlinks broke 265 downloads — never again): +# Linux → cp -aL (dereference ALL symlinks into real files) + rm dev tools + +# strip + sanitize + tar at ROOT, then validate_release.py as a HARD +# in-pipeline gate (the exact battle-tested script from the source repo). +# Win → mach package; zip the CONTENTS of dist/firefox (clean tree, NOT +# dist/bin) so firefox.exe sits at the zip ROOT. +# macOS → mach package; ad-hoc codesign the .app; PRESERVE its internal relative +# symlinks (a .app legitimately has them — cp -aL would break it); verify +# every symlink is relative+internal; tar the bundle. --version self-gate. +# +# DRIVE GATE (the firefox-8 catcher): after build, every binary is DRIVEN by +# Playwright on its native runner (launch via juggler + real page + JS roundtrip, +# headless, no screenshot → GPU-free, zero proxy). A juggler-less binary renders +# a screenshot fine but is undrivable — only an actual drive catches that. The +# proxy realness gate (fppro/webrtc) stays LOCAL — it needs secrets. +# +# Trigger: push a tag `firefox-N`, or run manually. Hybrid runners, all free. +# ───────────────────────────────────────────────────────────────────────────── +name: release + +on: + push: + tags: ['firefox-*'] + workflow_dispatch: + inputs: + source_ref: + description: 'invisible_firefox ref to build' + default: 'stealth/150' + release_tag: + description: 'release tag to publish the draft under (e.g. firefox-9)' + required: true + +env: + SOURCE_REPO: feder-cr/invisible_firefox + SOURCE_REF: ${{ github.event.inputs.source_ref || 'stealth/150' }} + +jobs: + build: + name: build-${{ matrix.leg }} + runs-on: ${{ matrix.runner }} + timeout-minutes: 350 + strategy: + fail-fast: false + matrix: + include: + - leg: linux-x86_64 + runner: ubuntu-24.04 + family: linux + target: '' + rust_target: x86_64-unknown-linux-gnu + win_disables: 'no' + extra_pkgs: '' + asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz + - leg: linux-arm64 + runner: ubuntu-24.04-arm + family: linux + target: '' + rust_target: aarch64-unknown-linux-gnu + win_disables: 'no' + extra_pkgs: '' + asset: firefox-150.0.1-stealth-linux-arm64.tar.gz + - leg: win-x86_64 + runner: ubuntu-24.04 + family: win + target: x86_64-pc-windows-msvc + rust_target: x86_64-pc-windows-msvc + win_disables: 'yes' + extra_pkgs: 'msitools p7zip-full zip' + asset: firefox-150.0.1-stealth-win-x86_64.zip + - leg: macos-arm64 + runner: macos-15 + family: mac + target: aarch64-apple-darwin + rust_target: aarch64-apple-darwin + win_disables: 'no' + extra_pkgs: '' + asset: firefox-150.0.1-stealth-macos-arm64.tar.gz + - leg: macos-x86_64 + runner: macos-15-intel + family: mac + target: x86_64-apple-darwin + rust_target: x86_64-apple-darwin + win_disables: 'no' + extra_pkgs: '' + asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz + steps: + - name: Free disk + 16G swap (Linux runners) + if: matrix.family != 'mac' + run: | + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android \ + /usr/local/share/boost "${AGENT_TOOLSDIRECTORY:-/opt/hostedtoolcache}" 2>/dev/null || true + sudo fallocate -l 16G /swapfile && sudo chmod 600 /swapfile && sudo mkswap /swapfile && sudo swapon /swapfile || true + + - name: Checkout patched Firefox source + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + repository: ${{ env.SOURCE_REPO }} + ref: ${{ env.SOURCE_REF }} + fetch-depth: 1 + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: { python-version: '3.11' } + + - name: Install Linux build tools + if: matrix.family != 'mac' + run: | + sudo apt-get update + sudo apt-get install -y util-linux binutils ${{ matrix.extra_pkgs }} + + - name: Select Xcode 26.2 + export SDK path (macOS) + if: matrix.family == 'mac' + run: | + sudo xcode-select -s /Applications/Xcode_26.2.app + SDKP="$(xcrun --show-sdk-path)" + echo "SDK_PATH=$SDKP" >> "$GITHUB_ENV" + echo "macOS SDK $(xcrun --sdk macosx --show-sdk-version) at $SDKP" + + - name: Add Rust target + run: rustup target add ${{ matrix.rust_target }} || true + + - name: Extend the repo .mozconfig (NO mold; +target/SDK as needed) + run: | + test -f .mozconfig || { echo "ERROR: no .mozconfig in source"; exit 1; } + rm -f mozconfig + { + echo "" + echo "# --- release CI levers for ${{ matrix.leg }} (mold intentionally OFF — it segfaults libxul) ---" + echo "ac_add_options --disable-debug-symbols" + } >> .mozconfig + if [ -n "${{ matrix.target }}" ]; then echo "ac_add_options --target=${{ matrix.target }}" >> .mozconfig; fi + if [ "${{ matrix.family }}" = "mac" ]; then echo "ac_add_options --with-macos-sdk=$SDK_PATH" >> .mozconfig; fi + if [ "${{ matrix.win_disables }}" = "yes" ]; then + { echo "ac_add_options --disable-default-browser-agent"; + echo "ac_add_options --disable-maintenance-service"; + echo "ac_add_options --disable-update-agent"; } >> .mozconfig + fi + if [ "${{ matrix.family }}" = "mac" ]; then NCPU=$(sysctl -n hw.ncpu); else NCPU=4; fi + { echo "mk_add_options MOZ_PARALLEL_BUILD=$NCPU"; + echo "mk_add_options MOZ_OBJDIR=@TOPSRCDIR@/obj-rel"; } >> .mozconfig + echo "----- final .mozconfig -----"; cat .mozconfig + + - name: Build + run: ./mach build + + # ── LINUX: dereference symlinks (issue #14) + strip + sanitize + tar@root + GATE + - name: Package + validate (Linux) + if: matrix.family == 'linux' + run: | + set -e + DIST=obj-rel/dist/bin + STAGING=staging + rm -rf "$STAGING"; mkdir -p "$STAGING" out + cp -aL "$DIST/." "$STAGING/" # -L: dereference ALL symlinks into real files + N=$(find "$STAGING" -type l | wc -l) + [ "$N" -eq 0 ] || { echo "ERROR: $N symlinks remain after cp -aL"; exit 1; } + for t in xpcshell certutil pk12util rapl; do rm -f "$STAGING/$t"; done + # JUGGLER GATE: the binary is undrivable by Playwright without it (see 70-known-bugs) + { [ -e "$STAGING/chrome/juggler.manifest" ] && [ -d "$STAGING/chrome/juggler" ]; } \ + || { echo "ERROR: juggler missing from package (chrome/juggler) — Playwright can't drive it"; exit 1; } + echo "juggler GATE OK (loose chrome/juggler present)" + find "$STAGING" -type f \ + \( -name '*.so' -o -name firefox -o -name firefox-bin -o -name plugin-container \ + -o -name pingsender -o -name glxtest -o -name vaapitest -o -name updater \) \ + -exec strip --strip-debug {} + 2>/dev/null || true + STAGING="$STAGING" python3 scripts/linux_sanitize.py || true # no-op in CI (no /home/feder), defensive + tar --owner=0 --group=0 --numeric-owner --mtime="2026-01-01 00:00:00 UTC" \ + -czf "out/${{ matrix.asset }}" -C "$STAGING" . # firefox at ROOT + echo "=== HARD GATE: scripts/validate_release.py (the issue-#14 protector) ===" + python3 scripts/validate_release.py --linux "out/${{ matrix.asset }}" --linux-only + ls -la out/ + + # ── WINDOWS (cross): zip the CLEAN dist/firefox tree, firefox.exe at root + - name: Package (Windows cross) + if: matrix.family == 'win' + run: | + set -e + # Do NOT swallow a mach failure: `./mach package || echo` lets set -e pass + # and would fall through to a stale tree. A release MUST come from the clean + # dist/firefox; dist/bin is the dev tree (cruft + loose juggler that masked + # the firefox-7/8 packaging bugs), never acceptable for a release. + ./mach package + [ -f obj-rel/dist/firefox/firefox.exe ] \ + || { echo "ERROR: mach package did not produce a clean dist/firefox tree"; exit 1; } + WIN_APP=obj-rel/dist/firefox + echo "packaging from: $WIN_APP" + # JUGGLER GATE: omni.ja must carry juggler (else Playwright can't drive it) + [ -f "$WIN_APP/omni.ja" ] || { echo "ERROR: no omni.ja in $WIN_APP"; exit 1; } + python3 -c "import zipfile,sys; sys.exit(0 if any('juggler' in n.lower() for n in zipfile.ZipFile('$WIN_APP/omni.ja').namelist()) else 1)" \ + || { echo "ERROR: juggler missing from $WIN_APP/omni.ja — Playwright can't drive it"; exit 1; } + echo "juggler GATE OK (win)" + mkdir -p out + ( cd "$WIN_APP" && zip -qr "$GITHUB_WORKSPACE/out/${{ matrix.asset }}" . ) # firefox.exe at zip ROOT + ls -la out/ + + # ── macOS: package .app, ad-hoc sign, verify relative-internal symlinks, --version gate, tar + - name: Package + validate (macOS) + if: matrix.family == 'mac' + run: | + set -e + ./mach package + APP="$(find obj-rel/dist -maxdepth 2 -name '*.app' -type d | head -1)" + [ -n "$APP" ] || { echo "ERROR: no .app produced"; exit 1; } + echo "built app: $APP" + # JUGGLER GATE: the .app's omni.ja must carry juggler (else Playwright can't drive it) + python3 -c "import zipfile,sys,glob; jas=glob.glob('$APP/Contents/Resources/omni.ja')+glob.glob('$APP/Contents/Resources/browser/omni.ja'); sys.exit(0 if jas and any(any('juggler' in n.lower() for n in zipfile.ZipFile(j).namelist()) for j in jas) else 1)" \ + || { echo "ERROR: juggler missing from .app omni.ja — Playwright can't drive it"; exit 1; } + echo "juggler GATE OK (mac)" + codesign --force --deep --sign - --timestamp=none "$APP" + codesign --verify --deep --strict --verbose=2 "$APP" + echo "=== --version GATE ===" + "$APP/Contents/MacOS/firefox" --version + echo "=== critical files present ===" + for need in "Contents/MacOS/firefox" "Contents/Info.plist"; do + [ -e "$APP/$need" ] || { echo "ERROR: missing $need"; exit 1; } + done + echo "=== Info.plist well-formed + required keys (a malformed plist → Finder 'damaged') ===" + plutil -lint "$APP/Contents/Info.plist" + for key in CFBundleExecutable CFBundleIdentifier CFBundleShortVersionString; do + plutil -extract "$key" raw -o - "$APP/Contents/Info.plist" >/dev/null \ + || { echo "ERROR: Info.plist missing $key"; exit 1; } + done + EXEC="$(plutil -extract CFBundleExecutable raw -o - "$APP/Contents/Info.plist")" + [ -e "$APP/Contents/MacOS/$EXEC" ] \ + || { echo "ERROR: CFBundleExecutable '$EXEC' has no matching binary in Contents/MacOS"; exit 1; } + echo "=== verify NO absolute symlinks in the .app (relative-internal ones are fine) ===" + BAD="$(find "$APP" -type l -print0 | xargs -0 -I{} sh -c 't=$(readlink "{}"); case "$t" in /*) echo "{} -> $t";; esac')" + [ -z "$BAD" ] || { echo "ERROR: absolute symlinks in .app (break on user machines):"; echo "$BAD" | head -5; exit 1; } + echo "mac .app OK: critical files present, no absolute symlinks" + STABLE="$(dirname "$APP")/Firefox.app" + [ "$APP" = "$STABLE" ] || mv "$APP" "$STABLE" + mkdir -p out + tar -czf "out/${{ matrix.asset }}" -C "$(dirname "$STABLE")" Firefox.app # preserves internal symlinks + ls -la out/ + + - name: Upload build artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: asset-${{ matrix.leg }} + path: out/${{ matrix.asset }} + if-no-files-found: error + retention-days: 7 + + # DRIVE GATE — the firefox-8 catcher. A raw `firefox --screenshot` proves + # nothing about automation: a juggler-less binary renders fine and ships + # broken (firefox-8 did exactly that). So we DRIVE every binary the way users + # will: Playwright launches it over the juggler pipe, loads a real page, and + # round-trips JS. A binary missing/broken juggler throws TargetClosedError + # here and the release never publishes. Headless, NO screenshot → GPU-free, + # so it can't false-fail on the GPU-less hosted runners. Zero proxy / zero + # secrets → safe in public CI (the proxy realness gate stays local, by design). + # Each leg runs on its NATIVE runner so we test the real artifact, not a cross + # surrogate. Playwright is pinned to a version validated against this build's + # juggler; bump it in lockstep when the juggler is re-synced from upstream. + gate: + name: gate-${{ matrix.leg }} + needs: build + runs-on: ${{ matrix.runner }} + timeout-minutes: 25 + strategy: + fail-fast: false + matrix: + include: + # `extra: --full` adds the mouse/keyboard/canvas/navsurface interaction + # checks. Only on linux-x86_64 (historically the most reliable hosted + # runner): the interaction code is platform-identical JS (omni.ja), so + # one reliable full run catches a firefox-2-class regression for all + # platforms. The other legs run SMOKE (launch+http+UA+webdriver) — the + # firefox-8/juggler catcher — which is robust even on the flaky + # windows-latest runner. See scripts/ci_drive_gate.py. + - leg: linux-x86_64 + runner: ubuntu-24.04 + kind: linux + asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz + extra: '--full' + - leg: linux-arm64 + runner: ubuntu-24.04-arm + kind: linux + asset: firefox-150.0.1-stealth-linux-arm64.tar.gz + extra: '' + - leg: win-x86_64 + runner: windows-latest + kind: win + asset: firefox-150.0.1-stealth-win-x86_64.zip + extra: '' + - leg: macos-arm64 + runner: macos-15 + kind: mac + asset: firefox-150.0.1-stealth-macos-arm64.tar.gz + extra: '' + - leg: macos-x86_64 + runner: macos-15-intel + kind: mac + asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz + extra: '' + steps: + - name: Checkout wrapper (for scripts/ci_drive_gate.py) + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: { fetch-depth: 1 } + - name: Download asset + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + name: asset-${{ matrix.leg }} + path: art + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: { python-version: '3.11' } + - name: Install Playwright driver (no bundled browser — we override executable_path) + # Pin from a SINGLE source (scripts/playwright_pin.txt) so release.yml and + # verify-assets.yml can't drift to different versions. The drive gate then + # ENFORCES playwright↔juggler compatibility: an incompatible pin fails the + # launch/drive (TargetClosedError / protocol error) and nothing publishes. + # Bump the pin file in lockstep when the juggler is re-synced from upstream. + shell: bash + run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)" + - name: Linux system deps for headless firefox + if: matrix.kind == 'linux' + run: sudo "$(which python)" -m playwright install-deps firefox + - name: Extract + locate firefox binary + shell: bash + run: | + set -e + mkdir -p ff + A="art/${{ matrix.asset }}" + case "${{ matrix.kind }}" in + win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";; + linux) tar xzf "$A" -C ff; EXE="ff/firefox";; + mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";; + esac + [ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; } + chmod +x "$EXE" 2>/dev/null || true + echo "FF_EXE=$EXE" >> "$GITHUB_ENV" + echo "located: $EXE" + - name: DRIVE GATE — Playwright launch via juggler + real page (+ interaction on --full) + shell: bash + run: python scripts/ci_drive_gate.py "$FF_EXE" ${{ matrix.extra }} + + publish: + name: publish-draft-release + needs: [build, gate] + runs-on: ubuntu-24.04 + permissions: + contents: write + steps: + - name: Download all build assets + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: { pattern: asset-*, path: dl, merge-multiple: true } + - name: Assert all 5 target archives present (no silent partial release) + run: | + cd dl + EXPECTED=" + firefox-150.0.1-stealth-linux-x86_64.tar.gz + firefox-150.0.1-stealth-linux-arm64.tar.gz + firefox-150.0.1-stealth-win-x86_64.zip + firefox-150.0.1-stealth-macos-arm64.tar.gz + firefox-150.0.1-stealth-macos-x86_64.tar.gz + " + for a in $EXPECTED; do + [ -s "$a" ] || { echo "ERROR: missing/empty release asset: $a (a build leg silently dropped out?)"; exit 1; } + done + echo "all 5 target archives present" + - name: Generate checksums.txt + run: | + cd dl; ls -la + # explicit glob — never include checksums.txt itself (the `*`-includes-itself trap) + sha256sum firefox-150.0.1-stealth-* > checksums.txt + echo "----- checksums.txt -----"; cat checksums.txt + - name: Resolve release tag + id: tag + run: | + TAG="${{ github.event.inputs.release_tag }}" + [ -z "$TAG" ] && TAG="${GITHUB_REF_NAME}" + echo "tag=$TAG" >> "$GITHUB_OUTPUT" + echo "publishing DRAFT release for tag: $TAG" + - name: Create DRAFT release with all assets + uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2 + with: + tag_name: ${{ steps.tag.outputs.tag }} + name: invisible_firefox (150.0.1) rev ${{ steps.tag.outputs.tag }} + draft: true + prerelease: false + fail_on_unmatched_files: true + files: | + dl/*.tar.gz + dl/*.zip + dl/checksums.txt + body: | + Patched Firefox 150.0.1 — built on GitHub Actions ($0, no mold). + Targets: linux-x86_64, linux-arm64, win-x86_64, macos-arm64, macos-x86_64. + + DRAFT — do not publish until validate_release.py + realness gate pass on all archives. + + macOS: ad-hoc signed (not notarized). After download run: + xattr -dr com.apple.quarantine Firefox.app + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/verify-assets.yml b/.github/workflows/verify-assets.yml new file mode 100644 index 0000000..b4d3567 --- /dev/null +++ b/.github/workflows/verify-assets.yml @@ -0,0 +1,111 @@ +# ───────────────────────────────────────────────────────────────────────────── +# verify-assets.yml — re-runnable DRIVE GATE for an EXISTING release's assets. +# +# release.yml drive-gates every binary it builds. This does the same drive test +# WITHOUT rebuilding: it downloads a release's already-published assets (works on +# DRAFT releases too via GITHUB_TOKEN) and drives each one on its native runner. +# +# Use it to: +# • drive-test a release that was built before the in-pipeline gate existed +# (e.g. firefox-9, built on the old release.yml), or +# • re-verify any shipped release on demand (regression check). +# +# Same single-source-of-truth drive logic as release.yml: scripts/ci_drive_gate.py. +# Headless, no screenshot → GPU-free. Zero proxy / zero secrets. +# ───────────────────────────────────────────────────────────────────────────── +name: verify-assets + +on: + workflow_dispatch: + inputs: + release_tag: + description: 'release tag whose assets to drive-test (e.g. firefox-9)' + required: true + +permissions: + # write (not read) is required: GitHub only exposes DRAFT releases to tokens + # with push access. With contents:read, `gh release download` on a draft tag + # 404s ("release not found"). This workflow only READS assets — the elevated + # scope is solely to make draft releases visible to GITHUB_TOKEN. + contents: write + +jobs: + drive: + name: drive-${{ matrix.leg }} + runs-on: ${{ matrix.runner }} + timeout-minutes: 25 + strategy: + fail-fast: false + matrix: + include: + # --full (interaction) only on the reliable linux-x86_64 leg; others run + # the robust SMOKE drive. Same rationale as release.yml's gate. + - leg: linux-x86_64 + runner: ubuntu-24.04 + kind: linux + asset: firefox-150.0.1-stealth-linux-x86_64.tar.gz + extra: '--full' + - leg: linux-arm64 + runner: ubuntu-24.04-arm + kind: linux + asset: firefox-150.0.1-stealth-linux-arm64.tar.gz + extra: '' + - leg: win-x86_64 + runner: windows-latest + kind: win + asset: firefox-150.0.1-stealth-win-x86_64.zip + extra: '' + - leg: macos-arm64 + runner: macos-15 + kind: mac + asset: firefox-150.0.1-stealth-macos-arm64.tar.gz + extra: '' + - leg: macos-x86_64 + runner: macos-15-intel + kind: mac + asset: firefox-150.0.1-stealth-macos-x86_64.tar.gz + extra: '' + steps: + - name: Checkout wrapper (for scripts/ci_drive_gate.py) + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: { fetch-depth: 1 } + - name: Download the release asset (draft releases included) + shell: bash + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -e + mkdir -p art + gh release download "${{ github.event.inputs.release_tag }}" \ + --repo "${{ github.repository }}" \ + --pattern "${{ matrix.asset }}" \ + --dir art + ls -la art/ + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: { python-version: '3.11' } + - name: Install Playwright driver (no bundled browser — we override executable_path) + # Single-source pin (see release.yml); the drive gate enforces juggler compat. + shell: bash + run: python -m pip install --quiet "playwright==$(cat scripts/playwright_pin.txt)" + - name: Linux system deps for headless firefox + if: matrix.kind == 'linux' + run: sudo "$(which python)" -m playwright install-deps firefox + - name: Extract + locate firefox binary + shell: bash + run: | + set -e + mkdir -p ff + A="art/${{ matrix.asset }}" + case "${{ matrix.kind }}" in + win) python -c "import zipfile; zipfile.ZipFile('$A').extractall('ff')"; EXE="ff/firefox.exe";; + linux) tar xzf "$A" -C ff; EXE="ff/firefox";; + mac) tar xzf "$A" -C ff; EXE="ff/Firefox.app/Contents/MacOS/firefox";; + esac + [ -e "$EXE" ] || { echo "ERROR: firefox binary not found at $EXE"; exit 1; } + chmod +x "$EXE" 2>/dev/null || true + echo "FF_EXE=$EXE" >> "$GITHUB_ENV" + echo "located: $EXE" + - name: DRIVE GATE — Playwright launch via juggler + real page (+ interaction on --full) + shell: bash + run: python scripts/ci_drive_gate.py "$FF_EXE" ${{ matrix.extra }} diff --git a/.github/workflows/webrtc-e2e.yml b/.github/workflows/webrtc-e2e.yml new file mode 100644 index 0000000..d14b8ce --- /dev/null +++ b/.github/workflows/webrtc-e2e.yml @@ -0,0 +1,47 @@ +name: webrtc-e2e + +# Live WebRTC realness check against the shipped patched binary. +# +# Manual (workflow_dispatch) on purpose: it needs a firefox-N binary that +# carries the WebRTC fixes (synthetic srflx in genuine nICEr form + the +# default-route fallback behind a proxy). Run it after publishing such a +# binary — it is the release gate for "WebRTC looks real behind a proxy". +# Until that binary ships, test_not_blocked_behind_tcp_only_socks is EXPECTED +# to fail (the old binary is fully blocked behind a SOCKS proxy), which is the +# whole point of the gate. +# +# No smartproxy / credentials: the "behind a proxy" condition is faked by an +# in-process TCP-only SOCKS5 server (refuses UDP ASSOCIATE) and the egress IP +# is injected as an RFC 5737 TEST-NET address. Fully self-contained. + +on: + workflow_dispatch: + +jobs: + webrtc-e2e: + name: webrtc realness (ubuntu, py3.12) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install package + dev extras + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Fetch the patched Firefox binary + run: python -m invisible_playwright fetch + + - name: Resolve binary path + run: echo "STEALTHFOX_E2E_BINARY=$(python -m invisible_playwright path)" >> "$GITHUB_ENV" + + - name: Run WebRTC realness e2e (xvfb for the headless Firefox) + run: | + sudo apt-get update && sudo apt-get install -y xvfb + xvfb-run -a pytest tests/test_webrtc_realness.py -m e2e -o addopts="" -v -rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e9d5aec..f142d90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,57 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +### Added +- `timezone="auto"`: the browser timezone is auto-derived from the egress IP. By default (no explicit timezone) it ALWAYS resolves — from the proxy egress when a proxy is set, otherwise from the host's own public IP — so the zone can never disagree with the IP (the classic `timezone_mismatch` signal). An explicit `"Area/City"` is the only way to force a specific zone. On failure: with a proxy the launch raises (no silent host-TZ fallback behind a foreign proxy); without a proxy it falls back to the host TZ so a transient lookup can't break the launch. +- The egress IP is mapped to its IANA zone with an offline mmdb (`daijro/geoip-all-in-one`). It auto-updates against the upstream weekly rebuild: cached locally, re-checked after `GEOIP_REFRESH_DAYS` (7), older copies pruned, and a stale cache is reused when offline. `STEALTHFOX_GEOIP_MMDB` points at your own `.mmdb` to skip the download. +- `resolve_session_timezone(timezone, proxy)` and `ensure_geoip_mmdb()` re-exported at the package root (plus `GeoTimezoneError`) so integrations that own their launch can reproduce the resolution. +- `tests/test_geo.py` (37) + `tests/test_geoip_update.py` (freshness / auto-update / offline fallback) unit tests. + +### Changed +- New runtime dependencies: `requests[socks]` (SOCKS egress lookup), `maxminddb` (mmdb reader), `tzdata` (IANA database for `zoneinfo`, which Windows lacks). + +## [0.2.0] - 2026-05-28 + +### Added +- Public config helpers in `invisible_playwright.config`: `get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display)` returns a complete `firefox_user_prefs` dict; `get_default_args()` returns the baseline CLI args list (currently empty). Both also re-exported at the package root. +- `invisible_playwright.ensure_binary` re-exported at the package root for parity with the `cloakbrowser.download.ensure_binary` integration pattern that downstream projects (Skyvern, Crawlee, agno) already expect. +- These helpers let third-party fetchers (changedetection.io plugins, Crawlee `BrowserPool` subclasses, agno toolkits) drive `playwright.firefox.launch(executable_path=..., firefox_user_prefs=...)` themselves without depending on the `InvisiblePlaywright` context manager owning the lifecycle. +- `tests/unit/test_config_public.py`: 14 unit tests covering deterministic seed, locale / timezone / pin / extra_prefs / humanize variations, and round-trip via the public namespace. + +### Unchanged +- `InvisiblePlaywright` context manager surface is identical (backwards compatible). +- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build. + +## [0.1.8] - 2026-05-23 + +### Fixed +- [#20](https://github.com/feder-cr/invisible_playwright/issues/20): cross-origin iframes were unreachable from Playwright. `element_handle.content_frame()` returned `None`, `frame.evaluate()` threw cross-origin SOP errors, and `frame_locator(...).click()` timed out even with `force=True`. Root cause: FF150 defaults `fission.webContentIsolationStrategy=1` (`IsolateEverything`), which site-isolates every cross-origin iframe into a separate `webIsolated` content process even when `fission.autostart=False`. The parent's Juggler FrameTree then has a Frame placeholder with no docShell and no URL — every protocol op that needs to enter the iframe fails. Fix: pin `fission.webContentIsolationStrategy=0` (`IsolateNothing`) in the baseline prefs. The setting can be flipped back per session via `extra_prefs={"fission.webContentIsolationStrategy": 1}`. + +### Added +- `tests/test_cross_origin_iframe.py`: 4 unit + 5 e2e regression sentinels for cross-origin iframe interaction. The e2e layer runs entirely offline against two local HTTP servers on `127.0.0.1` (two ports = two SOP origins) and covers `page.frames` URL tracking, `content_frame()`, `frame.evaluate()`, `frame_locator(...).locator(...)`, and end-to-end `dispatch_event("click")` for plain, sandboxed and titled iframes. A future FF upgrade or fingerprint A/B that flips the pref back to `1` will fail the suite before shipping. + +### Unchanged +- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build was needed. + +## [0.1.7] - 2026-05-21 + +### Fixed +- [#18](https://github.com/feder-cr/invisible_playwright/issues/18): Tab crash when running with `headless=True` on Windows on pages that trigger cross-process navigation. Two separate bugs that only manifested together: (1) the Chromium content sandbox at default level 6 puts content processes on `kAlternateWinstation`, but the wrapper hides the browser window on its own alt-desktop (`CreateDesktop` for headless on Windows). Mismatched desktops → cross-process navigations couldn't reparent windows → content process exits cleanly and Playwright fires `page.on('crash')`. (2) The canvas2d `getImageData` stealth spoof wrote to a read-only mapped `DataSourceSurface`. On GPU-backed canvases that memory is write-protected → segfault during the final `getImageData` at page unload. Wrapper now sets `security.sandbox.content.level=4` in the alt-desktop workaround set, and `firefox-7` ships the source fix that moves the noise to the JS array's writable backing buffer. + +### Changed +- `BINARY_VERSION` bumped from `firefox-5` to `firefox-7`. `firefox-6` was rolled back when its partial fix turned out to be wrong (the iframe-burst hypothesis was a dead end; bisection in the evening found the real two-bug cause documented above). + +## [0.1.6] - 2026-05-21 + +### Added +- `profile_dir=` kwarg on `InvisiblePlaywright` (sync + async). When set, the session uses `firefox.launch_persistent_context()` so cookies, localStorage, sessionStorage, extensions, cache and prefs are kept on disk between runs. `__enter__` returns a `BrowserContext` directly: `with InvisiblePlaywright(profile_dir=p) as ctx: ctx.new_page()`. Pair with a stable `seed=` to also pin the fingerprint identity across runs. First run creates the dir; subsequent runs reuse it. + +### Fixed +- `launch_persistent_context(timezone_id="…")` no longer times out at 180s. Root cause: `juggler/content/main.js` calls `docShell.overrideTimezone(...)` on every navigation; the patched Firefox up to firefox-4 didn't expose that IDL method on `nsIDocShell`, so the call threw `TypeError: docShell.overrideTimezone is not a function`. On the non-persistent path the error fired *after* launch and was harmless; on the persistent path it blocked the launch handshake. `firefox-5` ships the C++ method (see `patch.md` section 19); this release removes the firefox-4 era Python workaround that was filtering `locale`/`timezone_id` out of the persistent context kwargs. + +### Changed +- `BINARY_VERSION` bumped from `firefox-4` to `firefox-5`. The Python source delta is JS/Python only; the new Firefox build adds 50 lines of C++ in `docshell/base/nsIDocShell.idl` + `nsDocShell.cpp`. + ## [0.1.5] - 2026-05-20 ### Fixed @@ -22,7 +73,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [0.1.3] - 2026-05-19 ### Changed -- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible-firefox#stealth/150](https://github.com/feder-cr/invisible-firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`). +- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible_firefox#stealth/150](https://github.com/feder-cr/invisible_firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`). - The patched Firefox archive now ships the **proper C++ implementation** of `windowUtils.jugglerSendMouseEvent`, replacing the JS shim from 0.1.2. ### C++ fixes landed in this release @@ -33,7 +84,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - **C7 (partial)**: storage stub for `nsIDocShell.languageOverride`. Workaround `InvisiblePlaywright(locale="")` recommended until full BC FIELD port lands. ### Verified -- Both archives built from same source: feder-cr/invisible-firefox commit `68906f1f9c55`. +- Both archives built from same source: feder-cr/invisible_firefox commit `68906f1f9c55`. - Windows + Linux smoke suite green: launch, `ctx.new_page()`, `page.mouse.{move,down,up,click,wheel}`, `navigator.webdriver=false`, sannysoft 32/33 PASS. - SHA256 published in `checksums.txt` on the `firefox-3` release. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b56e5d3..8eb110d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,7 +7,7 @@ Thanks for your interest in improving this project. Contributions are welcome vi - **Bug?** Open a [bug report](https://github.com/feder-cr/invisible_playwright/issues/new?template=bug_report.yml). - **Idea?** Open a [feature request](https://github.com/feder-cr/invisible_playwright/issues/new?template=feature_request.yml). - **Security issue?** Do **not** open a public issue — see [SECURITY.md](SECURITY.md). -- **The C++ patches** live in the companion repo [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there. +- **The C++ patches** live in the companion repo [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there. ## Scope @@ -18,7 +18,7 @@ This repository ships the **Python wrapper** (`invisible_playwright`) around a p - Binary download/caching, CLI, proxy plumbing - Tests, docs, examples, packaging -Out of scope (belongs in `invisible-firefox`): +Out of scope (belongs in `invisible_firefox`): - Changes to the Firefox C++ source - New preferences exposed by the patched binary @@ -65,7 +65,7 @@ Before opening, please: - Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues) — the bug may already be tracked. - Reproduce on the **latest release** if possible. -- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible-firefox` instead. +- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible_firefox` instead. Include: diff --git a/README.md b/README.md index 205617c..a1e5f6b 100644 --- a/README.md +++ b/README.md @@ -6,56 +6,26 @@ [![Firefox 150.0.1](https://img.shields.io/badge/firefox-150.0.1-orange.svg)](https://www.mozilla.org/firefox/) [![GitHub release](https://img.shields.io/github/v/release/feder-cr/invisible_playwright.svg)](https://github.com/feder-cr/invisible_playwright/releases) [![GitHub stars](https://img.shields.io/github/stars/feder-cr/invisible_playwright.svg?style=social)](https://github.com/feder-cr/invisible_playwright/stargazers) +[![browser launches](https://img.shields.io/github/downloads/feder-cr/invisible_firefox/usage-counter/total?label=browser%20launches&color=blue)](https://github.com/feder-cr/invisible_firefox/releases/tag/usage-counter) [![LinkedIn](https://img.shields.io/badge/LinkedIn-Federico%20Elia-0A66C2?logo=linkedin&logoColor=white)](https://it.linkedin.com/in/federico-elia-5199951b6) -A patched Firefox **100% Playwright-compatible** that passes the hardest browser-fingerprint detectors in the wild. +**Stealth Firefox that passes every bot detection test. Drop-in Playwright replacement, fingerprint patched at the C++ level, not a JavaScript shim.** +![invisible_playwright - 5/5 detection suites passed](docs/screenshots/hero.gif) -## Results - -### Google reCAPTCHA v3 - **0.90 / 1.0** - -Top-tier score. Google classifies the session as "very likely a human". Most anti-detect stacks plateau around 0.3-0.7. - -![reCAPTCHA score 0.90](docs/screenshots/recaptcha_score.png) - -### Fingerprint Pro - **bot: not detected, VPN: false, tampering: false, dev tools: not detected** - -FingerprintJS Pro's full Smart Signals battery flips every flag to "Not detected". Browser correctly identified as Firefox 150 on Windows 10. Confidence score 0.9. - -![FingerprintPro not detected](docs/screenshots/fingerprintpro.png) - -### CreepJS - **0 lies**, fingerprint is internally coherent - -No contradictions between headless hints, spoofed values, and real rendering output. That "0 lies" is what kills most anti-detect browsers: one inconsistency (e.g. Chrome UA + Firefox WebGL) and the trust score collapses. - -![CreepJS 0 lies](docs/screenshots/creepjs.png) - -### BrowserLeaks WebRTC - **no public IP leak** - -WebRTC srflx address is the proxy egress IP; host candidates are private LAN. The real public IP never leaks via STUN, even on pages that configure their own ICE servers. Stock Firefox exposes an mDNS hostname (e.g. `abc-1234.local`) as a host ICE candidate, which is itself a stable per-session signal detectors fingerprint. invisible_playwright replaces host candidates with synthetic private-LAN IPs that match the spoofed network, removing the mDNS tell. - -![WebRTC no leaks](docs/screenshots/webrtc.png) - -### bot.sannysoft.com - **all checks pass** - -Every row green: WebDriver not present, Chrome-only properties absent, plugin/mime/languages arrays coherent, permissions API correct, iframe/source window checks pass. - -![Sannysoft all green](docs/screenshots/sannysoft.png) - ---- ## Why it's powerful -**Most anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems: + +**Most other anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems: 1. **JS patches are detectable.** Anti-bots enumerate native function `.toString()`, check descriptor configurability, compare property enumeration order, watch for prototype mutations. Every patch leaves a fingerprint of its own. CreepJS has an entire battery of "lies detectors" built around this. 2. **Chromium itself is now suspect.** Residential-proxy bot traffic is overwhelmingly Chromium-based, so detectors weight anything Chromium-shaped as risky by default. Chromium-based forks inherit Chrome's open-source layers (BoringSSL, Blink, V8, ANGLE) cleanly, but they still cannot fully match Chrome in practice: Chrome ships closed-source components on top (Widevine, proprietary codecs, Google Update / Safe Browsing endpoints) that flip detectable JS feature flags and network signals, and forks lag Chrome's release cadence by days to weeks, leaving telltale version-specific behaviours that detectors lock onto. **invisible_playwright patches Firefox at the C++ level.** The spoofed values come back out through the normal Gecko paths - there is no JS shim, no override, no `Object.defineProperty`. **From the page's point of view, the browser is just telling the truth.** Anti-bot lie-detectors have nothing to latch onto. -invisible_playwright spoofs **all the layers that matter, together, coherently** — Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) for the full per-layer breakdown of which C++ files are patched and why. +invisible_playwright spoofs **all the layers that matter, together, coherently**: Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) for the full per-layer breakdown of which C++ files are patched and why. Everything is driven by preferences - no hardcoded values in the binary. You change one pref, you change the spoofed value. @@ -63,23 +33,21 @@ Everything is driven by preferences - no hardcoded values in the binary. You cha ## How it compares -Commercial anti-detect browsers (Multilogin Mimic, GoLogin Orbita, AdsPower, Dolphin Anty) ship patched Chromium and apply most spoofing at the JavaScript layer. A few (Kameleo, Multilogin Stealthfox) also offer Firefox-based profiles, but the spoofing pattern is the same: runtime overrides on top of an unmodified rendering engine. That's the ceiling - and it's a low one. +**CloakBrowser** ships a similar pitch for Chromium, but its binary is **closed source** (the source-level patches are not published, you only get the compiled output), and it still hits the Chromium reCAPTCHA ceiling. The commercial anti-detect browsers (**Multilogin**, **GoLogin**, AdsPower, Dolphin, Kameleo) are paid SaaS that overlay JS-layer spoofing on a patched Chromium. Managed profiles are nice but raw detection bypass sits below both Camoufox and us. -| | invisible_playwright | Multilogin / GoLogin | AdsPower / Dolphin | Kameleo | +| | invisible_playwright | Camoufox | CloakBrowser | Multilogin | |---|---|---|---|---| -| Engine | Firefox (open source) | Chromium fork | Chromium fork | Chromium | -| Patch depth | C++ source | JS overrides | JS overrides | JS overrides | -| `.toString()` clean | ✅ Native Gecko path | ❌ Detectable shims | ❌ Detectable shims | ❌ Detectable shims | -| Canvas / WebGL | ✅ C++ level | ⚠️ JS override | ⚠️ JS override | ⚠️ JS override | -| SOCKS5 auth | ✅ Patched | ⚠️ Varies | ⚠️ Varies | ❌ | -| Self-hosted | ✅ | ❌ SaaS | ❌ SaaS | ❌ Cloud | -| reCAPTCHA v3 score | **0.90** | ~0.3-0.6 | ~0.3-0.5 | ~0.3-0.5 | -| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected | -| FP Pro - tampering | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected | -| FP Pro - VPN flag | ✅ false | ❌ true | ❌ true | ❌ true | -| CreepJS lies | ✅ 0 | ❌ multiple | ❌ multiple | ❌ multiple | - -Competitor scores reflect our own testing on Windows 10 against the same five detection suites used above; results may vary with their evolving builds. +| Engine | Firefox 150 | Firefox (~1 year old base) | Chromium | Chromium fork | +| Patch depth | C++ source | C++ source | C++ source | JS overrides | +| Maintenance | Active | Gap (~1 year) | Active | Active SaaS | +| Open source | ✅ MIT | ✅ MPL | ❌ Closed source | ❌ Closed source | +| `.toString()` clean | ✅ | ✅ | ✅ | ❌ Detectable shims | +| Canvas / WebGL / Audio | ✅ C++ | ⚠️ Drift vs current FF | ✅ C++ | ⚠️ JS override | +| SOCKS5 auth | ✅ Patched | ❌ | ⚠️ Playwright proxy | ⚠️ Varies | +| **reCAPTCHA v3 score** | **0.90** | ~0.3-0.5 | ~0.3-0.5 | ~0.3-0.6 | +| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected | +| CreepJS lies | ✅ 0 | ❌ Multiple | ✅ 0 | ❌ Multiple | +| Cost | Free | Free | Free | From $99/mo | --- @@ -172,6 +140,21 @@ with InvisiblePlaywright(proxy=proxy) as browser: Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak. +### Timezone + +The browser timezone follows `timezone=`: + +```python +# default: timezone is auto-derived from the egress IP (proxy egress if a +# proxy is set, otherwise the host's own public IP) +with InvisiblePlaywright(proxy=proxy) as browser: + ... + +# explicit IANA zone always wins — the only way to force a specific zone +with InvisiblePlaywright(proxy=proxy, timezone="America/New_York") as browser: + ... +``` + ### Pinning specific fingerprint fields By default everything comes from `seed`. To force specific values while the rest stays seed-derived: @@ -215,4 +198,4 @@ invisible_playwright takes a different angle than the major Firefox-hardening pr ## License -MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox). +MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox). diff --git a/SECURITY.md b/SECURITY.md index 19dbc11..83959a2 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -41,7 +41,7 @@ In scope: Out of scope here (report to the relevant project): -- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox/security/advisories/new) +- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox/security/advisories/new) - Vulnerabilities in upstream Firefox / mozilla-central — report to Mozilla per https://www.mozilla.org/security/ - Vulnerabilities in third-party dependencies (`playwright`, `requests`, etc.) — report to those projects directly diff --git a/docs/screenshots/hero.gif b/docs/screenshots/hero.gif new file mode 100644 index 0000000..eadbf1b Binary files /dev/null and b/docs/screenshots/hero.gif differ diff --git a/pyproject.toml b/pyproject.toml index 02f4cfc..4bf9262 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "invisible-playwright" -version = "0.1.5" +version = "0.2.0" description = "Playwright wrapper for a patched Firefox with deterministic stealth profile." readme = "README.md" requires-python = ">=3.11" @@ -22,13 +22,15 @@ classifiers = [ dependencies = [ "playwright>=1.40", "platformdirs>=4", - "requests>=2.31", + "requests[socks]>=2.31", + "maxminddb>=2.2", + "tzdata>=2024.1", "tqdm>=4.66", "pywin32>=306; sys_platform == 'win32'", ] [project.optional-dependencies] -dev = ["pytest>=7", "pytest-mock>=3", "responses>=0.24", "build>=1"] +dev = ["pytest>=7", "pytest-mock>=3", "responses>=0.24", "build>=1", "pytest-rerunfailures>=14", "playwright>=1.40"] [tool.pytest.ini_options] markers = [ @@ -39,6 +41,12 @@ markers = [ "linux_only: tests that require Linux platform", ] addopts = "-m 'not slow and not e2e'" +# tests/playwright-upstream/ is a vendored Microsoft Playwright test suite +# used for compatibility verification on demand. It has its own deps +# (pixelmatch with API not matching our version) and a conftest that fails +# collection in our env. Run it explicitly with --override-ini for compat +# audits, not on every push. +norecursedirs = ["playwright-upstream"] [project.scripts] invisible-playwright = "invisible_playwright.cli:main" diff --git a/scripts/ci_drive_gate.py b/scripts/ci_drive_gate.py new file mode 100644 index 0000000..2b6ebf0 --- /dev/null +++ b/scripts/ci_drive_gate.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""CI drive gate — the firefox-N catcher. + +A raw `firefox --screenshot` proves nothing about automation: a juggler-less +binary renders a screenshot just fine and ships broken (firefox-8 did exactly +that). This DRIVES the binary the way users will — Playwright launches it over +the juggler pipe and exercises real paths. + +Two levels (see `--full`): + + SMOKE (default — run on ALL 5 legs, on every binary's native runner): + launch over juggler-pipe → navigate a real http://127.0.0.1 page → assert a + response, the Firefox UA, navigator.webdriver falsy, and a DOM read. This is + the firefox-8 catcher (a juggler-less binary throws TargetClosedError on + launch) plus a base stealth + drivability check. It is intentionally LIGHT: + the free hosted runners — windows-latest especially — are content-process + unstable under a heavy headless interaction sequence (clicks/moves cascade + into "context destroyed" / selector-timeout / eval-CSP), so the gate that + must be GREEN on every leg stays minimal and reliable. + + FULL (`--full` — run on the historically-reliable Linux leg): + SMOKE plus mouse + keyboard input (firefox-2 / issue #9: + jugglerSendMouseEvent/synthesizeMouseEvent), canvas determinism (stealth + seed must be per-session), and navigator-surface tells. The interaction code + is platform-identical JS (it lives in omni.ja), so exercising it on one + reliable leg catches a regression for ALL platforms; win interaction is + additionally covered by local pre-release testing. + +NOT covered here: WebGL determinism (needs SWGL, false-fails headless) and the +faithful cross-origin iframe test (issue #20) — both live in the local realness +gate. All checks here are headless, no screenshot (GPU-free), loopback-only +(no external network / proxy / secrets) → safe in public CI. + +Robustness: a real loopback HTTP page (NOT data: / about:blank — those get +re-normalized / carry an eval-blocking CSP), arrow-function evaluates (never +eval'd), and up to 2 retries on transient context-destroyed/detached/timeout. +A genuinely broken binary fails ALL attempts → the gate fails. + +Usage: python ci_drive_gate.py [--full] +Exit 0 + "DRIVE GATE OK ..." on success; non-zero with a reason on failure. +""" +from __future__ import annotations + +import http.server +import socketserver +import sys +import threading + +HTML = ( + "dt" + "

hello-drive

" + "" + "" + "" + "" +).encode() + +CANVAS_DRAW = ( + "() => {const c=document.createElement('canvas');c.width=c.height=16;" + "const g=c.getContext('2d');g.fillStyle='#08f';g.fillRect(0,0,16,16);" + "g.fillStyle='#f40';g.fillText('s',2,12);return c.toDataURL();}" +) + +_TRANSIENT = ("context was destroyed", "frame was detached", "target closed", + "because of a navigation", "timeout", "blocked by csp") + + +class _Handler(http.server.BaseHTTPRequestHandler): + def do_GET(self): # noqa: N802 + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(HTML))) + self.end_headers() + self.wfile.write(HTML) + + def log_message(self, *a): # silence per-request stderr noise + pass + + +def _start_server(): + srv = socketserver.TCPServer(("127.0.0.1", 0), _Handler) + threading.Thread(target=srv.serve_forever, daemon=True).start() + return srv, srv.server_address[1] + + +def _drive(exe: str, url: str, full: bool) -> str: + """One full drive attempt. Returns the UA on success; raises on failure.""" + from playwright.sync_api import sync_playwright + + with sync_playwright() as p: + browser = p.firefox.launch(executable_path=exe, headless=True) + try: + page = browser.new_page() + resp = page.goto(url, wait_until="load") + assert resp and resp.ok, f"navigation to {url} failed: {resp.status if resp else 'no response'}" + ua = page.evaluate("() => navigator.userAgent") + webdriver = page.evaluate("() => navigator.webdriver") + text = page.evaluate("() => document.getElementById('x').textContent") + + inter = {} + if full: + # firefox-2 / issue-#9 catcher: real mouse + keyboard over juggler. + page.wait_for_selector("#b") + page.mouse.move(20, 20) + page.mouse.move(120, 90) # synthesizeMouseEvent path + page.click("#b") # mousedown/up/click → listener fires + page.click("#inp") + page.keyboard.type("ok") + inter["clicked"] = page.evaluate("() => window.__clicked") + inter["moves"] = page.evaluate("() => window.__moves") + inter["typed"] = page.evaluate("() => document.getElementById('inp').value") + inter["canvas_a"] = page.evaluate(CANVAS_DRAW) + inter["canvas_b"] = page.evaluate(CANVAS_DRAW) + inter["langs"] = page.evaluate("() => navigator.languages.length") + inter["plugins"] = page.evaluate("() => navigator.plugins instanceof PluginArray") + finally: + browser.close() + + # SMOKE asserts (always). + assert "Firefox" in ua, f"unexpected UA (binary not driving correctly): {ua!r}" + assert text == "hello-drive", f"DOM/JS roundtrip failed: {text!r}" + assert not webdriver, f"navigator.webdriver leaked True (stealth regression): {webdriver!r}" + + if full: + assert inter["clicked"] == 1, "page.click() did not fire the click listener — mouse-event synthesis broken (firefox-2 class)" + assert inter["moves"] >= 1, "page.mouse.move() produced no mousemove — jugglerSendMouseEvent regression" + assert inter["typed"] == "ok", f"page.keyboard.type() failed: {inter['typed']!r}" + assert inter["canvas_a"] == inter["canvas_b"], "canvas non-deterministic across identical draws (stealth seed broken → bot tell)" + assert inter["langs"] and inter["langs"] > 0, "navigator.languages empty (headless tell)" + assert inter["plugins"], "navigator.plugins is not a PluginArray (headless tell)" + return ua + + +def main(exe: str, full: bool) -> int: + srv, port = _start_server() + url = f"http://127.0.0.1:{port}/" + level = "full" if full else "smoke" + extras = "http+click+mousemove+keyboard+canvas-determinism+navsurface" if full else "http+ua+webdriver+dom" + last = None + try: + for attempt in (1, 2, 3): + try: + ua = _drive(exe, url, full) + if attempt > 1: + print(f"(note: drive succeeded on attempt {attempt} after a transient error)") + print(f"DRIVE GATE OK [{level}] | UA={ua} | {extras}=ok") + return 0 + except Exception as e: # noqa: BLE001 — gate: any failure must surface + last = e + msg = str(e).lower() + if attempt < 3 and any(t in msg for t in _TRANSIENT): + print(f"(transient error on attempt {attempt}, retrying): {e}", file=sys.stderr) + continue + break + finally: + srv.shutdown() + print(f"DRIVE GATE FAILED [{level}]: {last}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + args = sys.argv[1:] + full = "--full" in args + positional = [a for a in args if not a.startswith("--")] + if len(positional) != 1: + print("usage: ci_drive_gate.py [--full]", file=sys.stderr) + sys.exit(2) + sys.exit(main(positional[0], full)) diff --git a/scripts/playwright_pin.txt b/scripts/playwright_pin.txt new file mode 100644 index 0000000..094d6ad --- /dev/null +++ b/scripts/playwright_pin.txt @@ -0,0 +1 @@ +1.55.0 diff --git a/scripts/run_e2e.py b/scripts/run_e2e.py new file mode 100644 index 0000000..bec1c7d --- /dev/null +++ b/scripts/run_e2e.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""Run the FULL e2e suite (every test that opens the browser) against a binary. + +The 127 ``@pytest.mark.e2e`` tests are excluded from the default `pytest` run +(`addopts = -m 'not slow and not e2e'`) because they need a real Firefox binary +and a display, and they skip themselves when no binary is available. That makes +them easy to forget — and "we can't afford for something to not work". This is +the gate that runs them all, deliberately, against a chosen binary. + +It is the MANDATORY pre-release e2e gate: run it green against the freshly-built +release binary BEFORE un-drafting a firefox-N (alongside the fppro + WebRTC +realness gates). It is NOT in the public CI drive-gate — the hosted runners are +content-process unstable under a heavy headless interaction sequence (see +70-known-bugs / 60-ci-release-pipeline); this runs locally on reliable hardware. + +Flake-resilience: under full-suite load a couple of interaction tests (dblclick, +hover/mouseenter) can flake even though they pass 3/3 in isolation, so failures +are reran up to twice on the known transient signatures. A genuinely broken +binary fails all attempts. The webrtc e2e fake a TCP-only SOCKS locally (no +proxy/secrets), so the whole suite is offline. + +Usage: + python scripts/run_e2e.py + python scripts/run_e2e.py # uses $INVPW_BINARY_PATH +""" +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +_RERUN_SIGNATURES = "Timeout|context was destroyed|was detached|not visible|because of a navigation|TargetClosed" + + +def main() -> int: + binary = sys.argv[1] if len(sys.argv) > 1 else os.environ.get("INVPW_BINARY_PATH") + if not binary: + print("usage: run_e2e.py (or set INVPW_BINARY_PATH)", file=sys.stderr) + return 2 + if not Path(binary).exists(): + print(f"ERROR: binary not found: {binary}", file=sys.stderr) + return 2 + + env = dict(os.environ) + # One setting drives the whole suite: conftest's firefox_binary fixture and + # the webrtc e2e both resolve from these. + env["INVPW_BINARY_PATH"] = binary + env["STEALTHFOX_E2E_BINARY"] = binary + + repo = Path(__file__).resolve().parent.parent + cmd = [ + sys.executable, "-m", "pytest", + "-m", "e2e", + "-o", "addopts=", # override the default 'not e2e' deselection + "--reruns", "2", "--reruns-delay", "1", + "--only-rerun", _RERUN_SIGNATURES, + "-p", "no:cacheprovider", + "-q", "--tb=short", + ] + sys.argv[2:] + print(f"[run_e2e] binary={binary}") + print(f"[run_e2e] {' '.join(cmd)}") + return subprocess.run(cmd, cwd=repo, env=env).returncode + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/invisible_playwright/__init__.py b/src/invisible_playwright/__init__.py index 6bae9f3..0871021 100644 --- a/src/invisible_playwright/__init__.py +++ b/src/invisible_playwright/__init__.py @@ -15,8 +15,30 @@ Quickstart: page = browser.new_page() page.click("#submit") # expanded into a Bezier trajectory """ -from .launcher import InvisiblePlaywright +from .config import get_default_args, get_default_stealth_prefs from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION +from ._geo import GeoTimezoneError, resolve_session_timezone +from .download import ensure_binary, ensure_geoip_mmdb +from .launcher import InvisiblePlaywright -__version__ = "0.1.0" -__all__ = ["InvisiblePlaywright", "BINARY_VERSION", "FIREFOX_UPSTREAM_VERSION", "__version__"] +from importlib.metadata import PackageNotFoundError, version as _pkg_version + +try: + __version__ = _pkg_version("invisible-playwright") +except PackageNotFoundError: + # Editable / source checkout without an install record: fall back to a + # marker rather than risk shipping a stale hardcoded string. + __version__ = "0.0.0+unknown" + +__all__ = [ + "InvisiblePlaywright", + "ensure_binary", + "ensure_geoip_mmdb", + "get_default_stealth_prefs", + "get_default_args", + "resolve_session_timezone", + "GeoTimezoneError", + "BINARY_VERSION", + "FIREFOX_UPSTREAM_VERSION", + "__version__", +] diff --git a/src/invisible_playwright/_fpforge/_sampler.py b/src/invisible_playwright/_fpforge/_sampler.py index 5653db8..692f600 100644 --- a/src/invisible_playwright/_fpforge/_sampler.py +++ b/src/invisible_playwright/_fpforge/_sampler.py @@ -84,6 +84,12 @@ _FONT_POOL = _load("font_pool.json") _FONT_CORE: list = _FONT_POOL["core"] _FONT_OPTIONAL: list = _FONT_POOL["optional"] _CPT_FONTS_OPT = _load("cpt_fonts_optional_given_class.json")["table"] +# Browsing-history pool + CPT (per-class probabilities for visited sites). +# Drives _recaptcha_seed's cookie pre-seed: each persona ends up with a +# coherent list of ~15-30 visited sites whose categories correlate with +# gpu_class (workstation → dev-heavy, integrated_old → shop+news-heavy). +_BROWSING_POOL: list = _load("browsing_pool.json")["entries"] +_CPT_BROWSING = _load("cpt_browsing_given_class.json")["table"] # ═══════════════════════════════════════════════════════════════════════ @@ -282,6 +288,33 @@ def derive_font_whitelist(gpu_class: str, rng) -> str: return derive_font_prefs(gpu_class, rng)["whitelist"] +# ═══════════════════════════════════════════════════════════════════════ +# BROWSING HISTORY (Bayesian: per-site P(visited|gpu_class)) +# ═══════════════════════════════════════════════════════════════════════ +def derive_browsing_history(gpu_class: str, rng) -> list: + """Sample which sites this persona has visited recently. + + Each site in the pool has a per-class probability (CPT). We sample + independently per-site, producing a list of dicts: + [{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, ...] + + Sum of CPT probabilities per class is tuned to land ~15-30 visited sites + on average — an established-user signature. Sorted by name for stable + output across runs of the same seed. + """ + cpt = _CPT_BROWSING.get(gpu_class) + if cpt is None: + cpt = _CPT_BROWSING["mid_range"] + visited: list = [] + for entry in _BROWSING_POOL: + name = entry["name"] + p = cpt.get(name, 0.3) # default 0.3 for missing CPT row + if rng.random() < p: + visited.append(dict(entry)) # copy to avoid mutating pool + visited.sort(key=lambda e: e["name"]) + return visited + + # ═══════════════════════════════════════════════════════════════════════ # PUBLIC API: Forge # ═══════════════════════════════════════════════════════════════════════ @@ -350,6 +383,12 @@ class Forge: bundle["gpu_class"], self._rng ).items() }, + # Bayesian browsing history (per-class P(visited|gpu_class)). + # Consumed by _recaptcha_seed.py to seed coherent cookie history + # when invisible_playwright is launched with prep_recaptcha=True. + "browsing_history": derive_browsing_history( + bundle["gpu_class"], self._rng + ), } diff --git a/src/invisible_playwright/_fpforge/data/browsing_pool.json b/src/invisible_playwright/_fpforge/data/browsing_pool.json new file mode 100644 index 0000000..6e98cd9 --- /dev/null +++ b/src/invisible_playwright/_fpforge/data/browsing_pool.json @@ -0,0 +1,64 @@ +{ + "_comment": [ + "Pool of everyday websites used by the browsing_history node.", + "Each entry: { name, category, cookie_profile }.", + "- name: bare domain (no scheme, no leading dot).", + "- category: dev / shop / news / reference / media / community / misc.", + "- cookie_profile: short tag pointing to a cookie-template recipe used by", + " _recaptcha_seed.py to generate concrete cookies (so heavy-analytics sites", + " get _ga+_gid+OneTrust, simple sites get just _ga, dev tools get GH-style).", + "Add new entries here + add per-class probabilities in cpt_browsing_given_class.json." + ], + "entries": [ + {"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"}, + {"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"}, + {"name": "mozilla.org", "category": "reference", "cookie_profile": "ga_consent"}, + {"name": "w3schools.com", "category": "dev", "cookie_profile": "ga_consent_clarity"}, + {"name": "mdn.io", "category": "dev", "cookie_profile": "minimal"}, + {"name": "duckduckgo.com", "category": "reference", "cookie_profile": "minimal"}, + {"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, + {"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"}, + {"name": "npmjs.com", "category": "dev", "cookie_profile": "ga_consent"}, + {"name": "gitlab.com", "category": "dev", "cookie_profile": "ga_cf"}, + {"name": "pypi.org", "category": "dev", "cookie_profile": "minimal"}, + {"name": "docs.python.org", "category": "dev", "cookie_profile": "minimal"}, + {"name": "rust-lang.org", "category": "dev", "cookie_profile": "ga_consent"}, + {"name": "go.dev", "category": "dev", "cookie_profile": "ga_consent"}, + {"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "ebay.com", "category": "shop", "cookie_profile": "ga_consent"}, + {"name": "etsy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "bestbuy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "target.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "nytimes.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, + {"name": "cnn.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "bbc.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "theguardian.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, + {"name": "reuters.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "apnews.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "washingtonpost.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "techcrunch.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, + {"name": "theverge.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "arstechnica.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "wired.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, + {"name": "engadget.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "9to5mac.com", "category": "news", "cookie_profile": "ga_consent"}, + {"name": "medium.com", "category": "community", "cookie_profile": "ga_consent"}, + {"name": "dev.to", "category": "community", "cookie_profile": "ga_consent"}, + {"name": "reddit.com", "category": "community", "cookie_profile": "ga_cf"}, + {"name": "news.ycombinator.com", "category": "community", "cookie_profile": "minimal"}, + {"name": "quora.com", "category": "community", "cookie_profile": "ga_consent_clarity"}, + {"name": "stackexchange.com", "category": "community", "cookie_profile": "ga_consent_clarity"}, + {"name": "imdb.com", "category": "media", "cookie_profile": "ga_consent_clarity"}, + {"name": "rottentomatoes.com", "category": "media", "cookie_profile": "ga_consent"}, + {"name": "metacritic.com", "category": "media", "cookie_profile": "ga_consent"}, + {"name": "allrecipes.com", "category": "misc", "cookie_profile": "ga_consent_clarity"}, + {"name": "epicurious.com", "category": "misc", "cookie_profile": "ga_consent"}, + {"name": "tripadvisor.com", "category": "misc", "cookie_profile": "ga_consent_clarity"}, + {"name": "weather.com", "category": "reference", "cookie_profile": "ga_consent"}, + {"name": "timeanddate.com", "category": "reference", "cookie_profile": "ga_consent"}, + {"name": "thesaurus.com", "category": "reference", "cookie_profile": "ga_consent_clarity"}, + {"name": "kayak.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "booking.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "airbnb.com", "category": "shop", "cookie_profile": "ga_consent"} + ] +} diff --git a/src/invisible_playwright/_fpforge/data/cpt_browsing_given_class.json b/src/invisible_playwright/_fpforge/data/cpt_browsing_given_class.json new file mode 100644 index 0000000..b2e3b1a --- /dev/null +++ b/src/invisible_playwright/_fpforge/data/cpt_browsing_given_class.json @@ -0,0 +1,138 @@ +{ + "_comment": [ + "Per-class probability that a persona of a given gpu_class has visited each", + "site in the pool. Used by the browsing_history node to derive a coherent", + "visited-domain list per persona.", + "", + "Probabilities are tuned so each class samples ~15-30 sites on average", + "(sum across all 50 entries falls in that range), giving an established-user", + "look. Categories are biased by class:", + " - workstation/high_end: higher P(dev) + high P(news/media)", + " - mid_range: balanced", + " - low_end/integrated_*: lower P(dev), higher P(shop/news/reference)", + "", + "Missing class falls back to mid_range via Node CPT pool fallback." + ], + "table": { + "workstation": { + "youtube.com": 0.80, "wikipedia.org": 0.85, "mozilla.org": 0.70, + "w3schools.com": 0.40, "mdn.io": 0.55, "duckduckgo.com": 0.45, + "github.com": 0.95, "stackoverflow.com": 0.90, "npmjs.com": 0.65, + "gitlab.com": 0.50, "pypi.org": 0.55, "docs.python.org": 0.60, + "rust-lang.org": 0.35, "go.dev": 0.30, + "amazon.com": 0.70, "ebay.com": 0.25, "etsy.com": 0.15, + "bestbuy.com": 0.45, "target.com": 0.30, + "nytimes.com": 0.55, "cnn.com": 0.40, "bbc.com": 0.55, + "theguardian.com": 0.45, "reuters.com": 0.40, "apnews.com": 0.30, + "washingtonpost.com": 0.40, + "techcrunch.com": 0.65, "theverge.com": 0.60, "arstechnica.com": 0.65, + "wired.com": 0.50, "engadget.com": 0.35, "9to5mac.com": 0.30, + "medium.com": 0.55, "dev.to": 0.40, "reddit.com": 0.70, + "news.ycombinator.com": 0.65, "quora.com": 0.20, "stackexchange.com": 0.60, + "imdb.com": 0.45, "rottentomatoes.com": 0.25, "metacritic.com": 0.20, + "allrecipes.com": 0.20, "epicurious.com": 0.15, "tripadvisor.com": 0.30, + "weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25, + "kayak.com": 0.30, "booking.com": 0.35, "airbnb.com": 0.30 + }, + "high_end": { + "youtube.com": 0.85, "wikipedia.org": 0.80, "mozilla.org": 0.60, + "w3schools.com": 0.45, "mdn.io": 0.45, "duckduckgo.com": 0.40, + "github.com": 0.85, "stackoverflow.com": 0.80, "npmjs.com": 0.50, + "gitlab.com": 0.40, "pypi.org": 0.45, "docs.python.org": 0.50, + "rust-lang.org": 0.30, "go.dev": 0.25, + "amazon.com": 0.75, "ebay.com": 0.30, "etsy.com": 0.20, + "bestbuy.com": 0.50, "target.com": 0.35, + "nytimes.com": 0.50, "cnn.com": 0.50, "bbc.com": 0.50, + "theguardian.com": 0.40, "reuters.com": 0.35, "apnews.com": 0.30, + "washingtonpost.com": 0.35, + "techcrunch.com": 0.60, "theverge.com": 0.65, "arstechnica.com": 0.60, + "wired.com": 0.50, "engadget.com": 0.40, "9to5mac.com": 0.35, + "medium.com": 0.50, "dev.to": 0.35, "reddit.com": 0.75, + "news.ycombinator.com": 0.55, "quora.com": 0.25, "stackexchange.com": 0.55, + "imdb.com": 0.55, "rottentomatoes.com": 0.35, "metacritic.com": 0.30, + "allrecipes.com": 0.25, "epicurious.com": 0.20, "tripadvisor.com": 0.30, + "weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25, + "kayak.com": 0.30, "booking.com": 0.40, "airbnb.com": 0.30 + }, + "mid_range": { + "youtube.com": 0.85, "wikipedia.org": 0.75, "mozilla.org": 0.45, + "w3schools.com": 0.40, "mdn.io": 0.30, "duckduckgo.com": 0.35, + "github.com": 0.55, "stackoverflow.com": 0.55, "npmjs.com": 0.30, + "gitlab.com": 0.25, "pypi.org": 0.25, "docs.python.org": 0.30, + "rust-lang.org": 0.15, "go.dev": 0.15, + "amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30, + "bestbuy.com": 0.55, "target.com": 0.40, + "nytimes.com": 0.45, "cnn.com": 0.55, "bbc.com": 0.45, + "theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30, + "washingtonpost.com": 0.30, + "techcrunch.com": 0.45, "theverge.com": 0.50, "arstechnica.com": 0.40, + "wired.com": 0.45, "engadget.com": 0.35, "9to5mac.com": 0.30, + "medium.com": 0.45, "dev.to": 0.25, "reddit.com": 0.70, + "news.ycombinator.com": 0.30, "quora.com": 0.35, "stackexchange.com": 0.40, + "imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.35, + "allrecipes.com": 0.35, "epicurious.com": 0.25, "tripadvisor.com": 0.40, + "weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30, + "kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40 + }, + "low_end": { + "youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.35, + "w3schools.com": 0.30, "mdn.io": 0.20, "duckduckgo.com": 0.30, + "github.com": 0.30, "stackoverflow.com": 0.30, "npmjs.com": 0.15, + "gitlab.com": 0.10, "pypi.org": 0.10, "docs.python.org": 0.15, + "rust-lang.org": 0.05, "go.dev": 0.05, + "amazon.com": 0.85, "ebay.com": 0.50, "etsy.com": 0.40, + "bestbuy.com": 0.55, "target.com": 0.45, + "nytimes.com": 0.40, "cnn.com": 0.60, "bbc.com": 0.40, + "theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.30, + "washingtonpost.com": 0.25, + "techcrunch.com": 0.30, "theverge.com": 0.35, "arstechnica.com": 0.25, + "wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25, + "medium.com": 0.35, "dev.to": 0.15, "reddit.com": 0.65, + "news.ycombinator.com": 0.15, "quora.com": 0.45, "stackexchange.com": 0.25, + "imdb.com": 0.65, "rottentomatoes.com": 0.45, "metacritic.com": 0.35, + "allrecipes.com": 0.45, "epicurious.com": 0.30, "tripadvisor.com": 0.45, + "weather.com": 0.65, "timeanddate.com": 0.25, "thesaurus.com": 0.35, + "kayak.com": 0.35, "booking.com": 0.50, "airbnb.com": 0.40 + }, + "integrated_modern": { + "youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.40, + "w3schools.com": 0.35, "mdn.io": 0.25, "duckduckgo.com": 0.35, + "github.com": 0.40, "stackoverflow.com": 0.40, "npmjs.com": 0.20, + "gitlab.com": 0.15, "pypi.org": 0.20, "docs.python.org": 0.20, + "rust-lang.org": 0.10, "go.dev": 0.10, + "amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30, + "bestbuy.com": 0.50, "target.com": 0.40, + "nytimes.com": 0.40, "cnn.com": 0.55, "bbc.com": 0.45, + "theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30, + "washingtonpost.com": 0.30, + "techcrunch.com": 0.40, "theverge.com": 0.45, "arstechnica.com": 0.30, + "wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25, + "medium.com": 0.40, "dev.to": 0.20, "reddit.com": 0.65, + "news.ycombinator.com": 0.25, "quora.com": 0.40, "stackexchange.com": 0.35, + "imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.30, + "allrecipes.com": 0.40, "epicurious.com": 0.25, "tripadvisor.com": 0.40, + "weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30, + "kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40 + }, + "integrated_old": { + "youtube.com": 0.75, "wikipedia.org": 0.65, "mozilla.org": 0.30, + "w3schools.com": 0.20, "mdn.io": 0.10, "duckduckgo.com": 0.25, + "github.com": 0.15, "stackoverflow.com": 0.20, "npmjs.com": 0.05, + "gitlab.com": 0.05, "pypi.org": 0.05, "docs.python.org": 0.10, + "rust-lang.org": 0.02, "go.dev": 0.02, + "amazon.com": 0.85, "ebay.com": 0.55, "etsy.com": 0.45, + "bestbuy.com": 0.55, "target.com": 0.50, + "nytimes.com": 0.45, "cnn.com": 0.65, "bbc.com": 0.40, + "theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.35, + "washingtonpost.com": 0.30, + "techcrunch.com": 0.20, "theverge.com": 0.25, "arstechnica.com": 0.15, + "wired.com": 0.30, "engadget.com": 0.20, "9to5mac.com": 0.20, + "medium.com": 0.30, "dev.to": 0.05, "reddit.com": 0.55, + "news.ycombinator.com": 0.05, "quora.com": 0.55, "stackexchange.com": 0.15, + "imdb.com": 0.70, "rottentomatoes.com": 0.50, "metacritic.com": 0.35, + "allrecipes.com": 0.55, "epicurious.com": 0.35, "tripadvisor.com": 0.50, + "weather.com": 0.70, "timeanddate.com": 0.30, "thesaurus.com": 0.40, + "kayak.com": 0.40, "booking.com": 0.55, "airbnb.com": 0.40 + } + } +} diff --git a/src/invisible_playwright/_fpforge/profile.py b/src/invisible_playwright/_fpforge/profile.py index 16c52a4..fcdf024 100644 --- a/src/invisible_playwright/_fpforge/profile.py +++ b/src/invisible_playwright/_fpforge/profile.py @@ -120,6 +120,11 @@ class Profile: webgl: WebGLProfile fonts: List[str] dark_theme: bool + # Bayesian browsing-history: list of {name, category, cookie_profile} + # dicts sampled from data/browsing_pool.json with per-class CPT. Used + # by _recaptcha_seed.py to build a coherent cookie pre-seed when the + # caller opts in via Stealthfox(prep_recaptcha=True). + browsing_history: List[Dict[str, str]] = field(default_factory=list) _raw: Dict[str, Any] = field(default_factory=dict, repr=False, compare=False) def to_prefs_dict(self) -> Dict[str, Any]: @@ -255,5 +260,6 @@ def generate_profile(seed: int, pin: Optional[Dict[str, Any]] = None) -> Profile webgl=WebGLProfile(msaa_samples=int(raw["msaa_samples"])), fonts=fonts, dark_theme=bool(raw["dark_theme"]), + browsing_history=list(raw.get("browsing_history") or []), _raw=raw, ) diff --git a/src/invisible_playwright/_geo.py b/src/invisible_playwright/_geo.py new file mode 100644 index 0000000..02971e1 --- /dev/null +++ b/src/invisible_playwright/_geo.py @@ -0,0 +1,164 @@ +"""Resolve the session timezone from the egress IP (``timezone="auto"``). + +Approach B: discover the egress IP with one HTTP request — routed *through the +proxy* when one is set, otherwise a direct request that sees the host's own +public IP — then map IP → IANA timezone with an offline mmdb +(``daijro/geoip-all-in-one``, downloaded + cached by ``download.py``). + +Precedence (see ``resolve_session_timezone``): + + explicit IANA → unchanged explicit always wins + "" / "auto" → egress ALWAYS resolve. With a proxy, from the proxy + egress IP; without a proxy, from the host's + own public IP. This is the default. + +On failure: + with a proxy → raise a foreign proxy paired with the host TZ is + the precise ``timezone_mismatch`` signal, so + we fail loudly rather than fall back silently. + without a proxy → "" (host) the host TZ is a safe default, so a transient + lookup failure must not break the launch. +""" +from __future__ import annotations + +import ipaddress +from typing import Any, Dict, Optional +from urllib.parse import quote + +import requests + + +class GeoTimezoneError(RuntimeError): + """Raised when ``timezone="auto"`` cannot resolve a valid IANA zone.""" + + +# Plain-text IP echo endpoints (each returns just the caller's public IP). +_IP_ECHO_ENDPOINTS = ( + "https://api.ipify.org", + "https://icanhazip.com", + "https://checkip.amazonaws.com", +) + +_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://") + + +def _proxy_is_set(proxy: Optional[Dict[str, str]]) -> bool: + if not proxy: + return False + server = (proxy.get("server") or "").strip() + return bool(server) and server.lower() != "direct://" + + +def _proxies_for_requests(proxy: Dict[str, str]) -> Dict[str, str]: + """Translate our proxy dict into a ``requests`` proxies mapping. + + SOCKS5 uses the ``socks5h`` scheme so DNS is resolved proxy-side (matches + ``network.proxy.socks_remote_dns=True`` in the Firefox path). HTTP/HTTPS + pass through unchanged. Credentials are URL-encoded. + """ + server = (proxy.get("server") or "").strip() + low = server.lower() + if low.startswith("socks5://") or low.startswith("socks://"): + scheme = "socks5h" + elif low.startswith("socks4://"): + scheme = "socks4" + elif low.startswith("https://"): + scheme = "https" + else: + scheme = "http" + + host_port = server.split("://", 1)[1] if "://" in server else server + user = proxy.get("username") or "" + pwd = proxy.get("password") or "" + if user: + auth = f"{quote(user, safe='')}:{quote(pwd, safe='')}@" + else: + auth = "" + url = f"{scheme}://{auth}{host_port}" + return {"http": url, "https": url} + + +def discover_egress_ip( + proxy: Optional[Dict[str, str]] = None, *, timeout: float = 10.0 +) -> str: + """Return the public egress IP. + + Routes the request through ``proxy`` when given (SOCKS support requires + ``requests[socks]`` / PySocks); with ``proxy=None`` it makes a direct + request that sees the host's own public IP. Tries each echo endpoint in + turn; raises :class:`GeoTimezoneError` if none return a valid IP. + """ + proxies = _proxies_for_requests(proxy) if proxy else None + last_err: Optional[Exception] = None + for url in _IP_ECHO_ENDPOINTS: + try: + resp = requests.get(url, proxies=proxies, timeout=timeout) + resp.raise_for_status() + ip = resp.text.strip() + ipaddress.ip_address(ip) # validate (raises ValueError if not an IP) + return ip + except Exception as exc: # noqa: BLE001 - try the next endpoint + last_err = exc + continue + raise GeoTimezoneError( + f"could not discover the proxy egress IP via {len(_IP_ECHO_ENDPOINTS)} " + f"endpoints (last error: {last_err!r}). For SOCKS proxies make sure " + f"requests[socks] / PySocks is installed." + ) + + +def ip_to_timezone(ip: str, mmdb_path: Any) -> str: + """Map ``ip`` to its IANA timezone using the offline mmdb. + + Reads the standard MaxMind ``location.time_zone`` field and validates it + against the system tz database. Raises :class:`GeoTimezoneError` if the IP + is absent from the DB or the zone is missing / not a valid IANA name. + """ + import maxminddb + + with maxminddb.open_database(str(mmdb_path)) as reader: + record = reader.get(ip) + if not record: + raise GeoTimezoneError(f"egress IP {ip} not present in the geoip database") + tz = ((record.get("location") or {}) if isinstance(record, dict) else {}).get( + "time_zone" + ) + if not tz: + raise GeoTimezoneError(f"no timezone for egress IP {ip} in the geoip database") + from zoneinfo import ZoneInfo, ZoneInfoNotFoundError + + try: + ZoneInfo(tz) + except (ZoneInfoNotFoundError, ValueError) as exc: + raise GeoTimezoneError( + f"geoip returned an invalid IANA zone {tz!r} for {ip}: {exc}" + ) from exc + return tz + + +def resolve_session_timezone( + timezone: str, proxy: Optional[Dict[str, str]] +) -> str: + """Map the user's ``timezone`` setting to a concrete IANA zone (or ``""``). + + See the module docstring for the full precedence table. ``""``/``"auto"`` + ALWAYS resolve from the egress IP (proxy egress if a proxy is set, else the + host's own public IP). On failure: with a proxy we raise + :class:`GeoTimezoneError` (never silently use the host TZ behind a foreign + proxy); without a proxy we fall back to ``""`` (host TZ) so a transient + lookup failure can't break the launch. + """ + tz = (timezone or "").strip() + if tz and tz.lower() != "auto": + return tz # explicit IANA wins + # "" or "auto" → always resolve from the egress IP. + from .download import ensure_geoip_mmdb + + proxy_set = _proxy_is_set(proxy) + try: + ip = discover_egress_ip(proxy if proxy_set else None) + return ip_to_timezone(ip, ensure_geoip_mmdb()) + except Exception: + if proxy_set: + raise # fail-early behind a proxy (timezone_mismatch trap) + return "" # no proxy: host TZ is a safe fallback diff --git a/src/invisible_playwright/_recaptcha_seed.py b/src/invisible_playwright/_recaptcha_seed.py new file mode 100644 index 0000000..cd998a2 --- /dev/null +++ b/src/invisible_playwright/_recaptcha_seed.py @@ -0,0 +1,340 @@ +"""Deterministic reCAPTCHA cookie pre-seed. + +Consumes the Bayesian-sampled `browsing_history` from the persona Profile +(see `_fpforge/_sampler.py:derive_browsing_history`). For each visited +site, builds 1-5 realistic cookies whose composition is chosen by the +site's `cookie_profile` tag (analytics-only / consent / cloudflare-bot- +management / etc.). All values seeded deterministically from the persona +seed, so a given persona always presents the SAME cookies across sessions. + +In addition, always seeds 5 cookies on .google.com (NID, CONSENT, SOCS, +_GRECAPTCHA, ENID). Excludes 1P_JAR which was deprecated by Google in 2022 +— including it now is an anachronism flag. + +Public API: + await seed_recaptcha_cookies_async(context, profile, timezone=None) + seed_recaptcha_cookies_sync(context, profile, timezone=None) + +`profile` is an `_fpforge.Profile`; `timezone` is the IANA tz (e.g. +"Europe/Rome") used to derive the CONSENT cookie's language token, so a +European-tz persona gets CONSENT in their language not en+FX. +""" +from __future__ import annotations + +import datetime +import random +import time +from typing import Any, List, Optional + +# URL-safe base64 alphabet (no padding chars). +_B64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" +_HEX_ALPHABET = "0123456789abcdef" + + +def _sub_seed(seed: int, tag: str) -> int: + """FNV-1a mix → independent PRNG streams per logical bucket from one seed.""" + h = 0xcbf29ce484222325 ^ (seed & 0xFFFFFFFF) + for c in tag.encode("ascii"): + h ^= c + h = (h * 0x100000001b3) & 0xFFFFFFFFFFFFFFFF + return h or 0xdeadbeef + + +def _b64_rand(rng: random.Random, length: int) -> str: + return "".join(rng.choice(_B64_ALPHABET) for _ in range(length)) + + +def _hex_rand(rng: random.Random, length: int) -> str: + return "".join(rng.choice(_HEX_ALPHABET) for _ in range(length)) + + +def _yyyymmdd_utc(ts: int) -> str: + return datetime.datetime.utcfromtimestamp(ts).strftime("%Y%m%d") + + +# IANA timezone -> (country_code, lang) for CONSENT cookie coherence. +# Real EU users get CONSENT with `++NNN`; non-EU gets `en+FX+NNN`. +# Default fallback `en+FX+NNN` for any tz not in this map. +_TZ_TO_REGION = { + "Europe/Rome": ("IT", "it"), + "Europe/Berlin": ("DE", "de"), + "Europe/Paris": ("FR", "fr"), + "Europe/Madrid": ("ES", "es"), + "Europe/London": ("GB", "en"), + "Europe/Amsterdam": ("NL", "nl"), + "Europe/Brussels": ("BE", "fr"), + "Europe/Vienna": ("AT", "de"), + "Europe/Zurich": ("CH", "de"), + "Europe/Dublin": ("IE", "en"), + "Europe/Lisbon": ("PT", "pt"), + "Europe/Stockholm": ("SE", "sv"), + "Europe/Oslo": ("NO", "no"), + "Europe/Copenhagen": ("DK", "da"), + "Europe/Helsinki": ("FI", "fi"), + "Europe/Warsaw": ("PL", "pl"), + "Europe/Prague": ("CZ", "cs"), + "Europe/Athens": ("GR", "el"), + "Asia/Tokyo": ("FX", "ja"), + "Asia/Shanghai": ("FX", "zh"), + "Asia/Hong_Kong": ("FX", "zh"), + "Asia/Seoul": ("FX", "ko"), +} + + +def _consent_region_lang(timezone: Optional[str]) -> tuple: + """Map IANA tz → (region_token, lang_2char) for CONSENT cookie. + Default `("FX", "en")` for US/unknown.""" + if timezone and timezone in _TZ_TO_REGION: + return _TZ_TO_REGION[timezone] + return ("FX", "en") + + +# --------------------------------------------------------------------------- +# .google.com cookie batch (always present, regardless of browsing history) +# --------------------------------------------------------------------------- + +def _google_cookies(rng: random.Random, now: int, + timezone: Optional[str] = None) -> List[dict]: + consent_age = rng.randint(60, 720) * 86400 + region, lang = _consent_region_lang(timezone) + # NID 3-digit prefix range broadened to 100-540 to cover historical NID + # versions (137, 105, 511, 525 etc. observed in real captures). + return [ + {"name": "NID", + "value": f"{rng.randint(100, 540)}={_b64_rand(rng, 178)}", + "domain": ".google.com", "path": "/", + "expires": now + 180 * 86400, + "httpOnly": True, "secure": True, "sameSite": "None"}, + {"name": "CONSENT", + "value": f"YES+cb.{_yyyymmdd_utc(now - consent_age)}-" + f"{rng.randint(10, 19):02d}-p{rng.randint(0, 9)}." + f"{lang}+{region}+{rng.randint(100, 999)}", + "domain": ".google.com", "path": "/", + "expires": now + 395 * 86400, + "secure": True, "sameSite": "Lax"}, + # 1P_JAR removed: Google deprecated it in 2022. Including it now is + # an anachronism flag for fingerprinters that look at cookie freshness. + {"name": "SOCS", + "value": f"CAES{_b64_rand(rng, 56)}", + "domain": ".google.com", "path": "/", + "expires": now + 395 * 86400, + "secure": True, "sameSite": "Lax"}, + {"name": "_GRECAPTCHA", + "value": _b64_rand(rng, 124), + "domain": ".google.com", "path": "/", + "expires": now + 180 * 86400, + "secure": True, "sameSite": "None"}, + {"name": "ENID", + "value": _b64_rand(rng, 252), + "domain": ".google.com", "path": "/", + "expires": now + 395 * 86400, + "httpOnly": True, "secure": True, "sameSite": "Lax"}, + ] + + +# --------------------------------------------------------------------------- +# Per-site cookie generators (recipes keyed by site["cookie_profile"]) +# --------------------------------------------------------------------------- + +def _norm_domain(domain: str) -> str: + return domain if domain.startswith(".") else "." + domain + + +def _ga_cookie(rng: random.Random, now: int, domain: str) -> dict: + first_age = rng.randint(7, 395) * 86400 + return {"name": "_ga", + "value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - first_age}", + "domain": domain, "path": "/", + "expires": now + 395 * 86400, + "secure": True, "sameSite": "Lax"} + + +def _gid_cookie(rng: random.Random, now: int, domain: str) -> dict: + return {"name": "_gid", + "value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - rng.randint(60, 86400)}", + "domain": domain, "path": "/", + "expires": now + 86400, + "secure": True, "sameSite": "Lax"} + + +def _cf_bm_cookie(rng: random.Random, now: int, domain: str) -> dict: + return {"name": "__cf_bm", + "value": f"{_b64_rand(rng, 43)}.{rng.randint(1700000000, now)}-1-1-1-1", + "domain": domain, "path": "/", + "expires": now + 1800, + "secure": True, "sameSite": "None"} + + +def _onetrust_cookie(rng: random.Random, now: int, domain: str) -> dict: + age_d = rng.randint(7, 365) + iso = datetime.datetime.utcfromtimestamp(now - age_d * 86400).strftime( + "%Y-%m-%dT%H:%M:%S.000Z" + ) + return {"name": "OptanonAlertBoxClosed", + "value": iso, + "domain": domain, "path": "/", + "expires": now + 395 * 86400, + "secure": True, "sameSite": "Lax"} + + +def _cookieyes_cookie(rng: random.Random, now: int, domain: str) -> dict: + return {"name": "cookieyes-consent", + "value": "consentid:" + _b64_rand(rng, 28) + + ",consent:yes,action:yes,necessary:yes,functional:yes,analytics:yes", + "domain": domain, "path": "/", + "expires": now + 395 * 86400, + "secure": True, "sameSite": "Lax"} + + +def _clarity_cookie(rng: random.Random, now: int, domain: str) -> dict: + return {"name": "_clck", + "value": f"{_hex_rand(rng, 8)}|2|f{rng.randint(10, 99)}|0|" + f"{now - rng.randint(60, 180) * 86400}", + "domain": domain, "path": "/", + "expires": now + 365 * 86400, + "secure": True, "sameSite": "Lax"} + + +def _fbp_cookie(rng: random.Random, now: int, domain: str) -> dict: + """Facebook Pixel _fbp = fb...""" + return {"name": "_fbp", + "value": f"fb.1.{(now - rng.randint(60, 30*86400)) * 1000}." + f"{rng.randint(100000000, 9999999999)}", + "domain": domain, "path": "/", + "expires": now + 90 * 86400, + "secure": True, "sameSite": "Lax"} + + +def _gtm_cookie(rng: random.Random, now: int, domain: str) -> dict: + """_dc_gtm_=1 — Google Tag Manager throttle flag.""" + container = f"UA-{rng.randint(10000000, 99999999)}-{rng.randint(1, 9)}" + return {"name": f"_dc_gtm_{container}", + "value": "1", + "domain": domain, "path": "/", + "expires": now + 60, + "secure": True, "sameSite": "Lax"} + + +def _hssrc_cookie(rng: random.Random, now: int, domain: str) -> dict: + """HubSpot referrer flag — small int.""" + return {"name": "__hssrc", + "value": str(rng.randint(1, 5)), + "domain": domain, "path": "/", + "expires": now + 1800, + "secure": True, "sameSite": "Lax"} + + +def _cookies_for_profile(profile: str, rng: random.Random, + now: int, domain: str) -> List[dict]: + """Map cookie_profile tag (from browsing_pool.json) → concrete cookies. + + Each recipe is a realistic combination observed on real production sites + in that category. Cookie age and sub-recipe variance (e.g., OneTrust vs + CookieYes for consent banner) are deterministic from rng. + """ + domain = _norm_domain(domain) + if profile == "minimal": + return [_ga_cookie(rng, now, domain)] + if profile == "ga_only": + out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)] + # 30% chance of GTM helper paired with GA + if rng.random() < 0.3: + out.append(_gtm_cookie(rng, now, domain)) + return out + if profile == "ga_cf": + return [_ga_cookie(rng, now, domain), _cf_bm_cookie(rng, now, domain)] + if profile == "ga_consent": + out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)] + out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5 + else _cookieyes_cookie(rng, now, domain)) + if rng.random() < 0.4: + out.append(_gtm_cookie(rng, now, domain)) + return out + if profile == "ga_consent_clarity": + # Heavy-tracking site profile: GA + Clarity + consent + often FB pixel + out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain), + _clarity_cookie(rng, now, domain)] + out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5 + else _cookieyes_cookie(rng, now, domain)) + if rng.random() < 0.5: + out.append(_fbp_cookie(rng, now, domain)) + if rng.random() < 0.4: + out.append(_gtm_cookie(rng, now, domain)) + if rng.random() < 0.25: + out.append(_hssrc_cookie(rng, now, domain)) + return out + # Unknown profile → safe fallback + return [_ga_cookie(rng, now, domain)] + + +# --------------------------------------------------------------------------- +# Public builder +# --------------------------------------------------------------------------- + +def build_cookies(seed: int, + browsing_history: Optional[List[dict]] = None, + now: Optional[int] = None, + timezone: Optional[str] = None) -> List[dict]: + """Build the full cookie list for a persona. + + Args: + seed: persona integer seed (from `Profile.seed`) + browsing_history: list of {name, category, cookie_profile} dicts as + sampled by `_fpforge.derive_browsing_history`. None → empty list + (only the 5 google cookies are returned). + now: unix-seconds timestamp; defaults to current time. Pin for tests. + timezone: IANA tz used to derive CONSENT cookie's `lang+region` token + (e.g. "Europe/Rome" → "it+IT", "America/New_York" → "en+FX"). + """ + ts = now if now is not None else int(time.time()) + cookies: List[dict] = [] + + # 5 .google.com cookies (always) — CONSENT lang derived from tz + rng_g = random.Random(_sub_seed(int(seed), "google")) + cookies.extend(_google_cookies(rng_g, ts, timezone=timezone)) + + # Per-site cookies (deterministic from seed × domain) + for site in (browsing_history or []): + rng_d = random.Random(_sub_seed(int(seed), f"dom:{site['name']}")) + cookies.extend(_cookies_for_profile( + site.get("cookie_profile", "minimal"), rng_d, ts, site["name"] + )) + return cookies + + +def _extract_seed_and_history(profile: Any) -> tuple: + """Accept a Profile object OR a (seed, history) tuple OR just an int seed.""" + if isinstance(profile, int): + return int(profile), [] + seed = int(getattr(profile, "seed")) + history = list(getattr(profile, "browsing_history", []) or []) + return seed, history + + +async def seed_recaptcha_cookies_async(context: Any, profile: Any, + timezone: Optional[str] = None) -> None: + """Async: inject deterministic persona cookies into the context.""" + seed, history = _extract_seed_and_history(profile) + cookies = build_cookies(seed, history, timezone=timezone) + try: + await context.add_cookies(cookies) + except Exception: + pass + + +def seed_recaptcha_cookies_sync(context: Any, profile: Any, + timezone: Optional[str] = None) -> None: + """Sync: inject deterministic persona cookies into the context.""" + seed, history = _extract_seed_and_history(profile) + cookies = build_cookies(seed, history, timezone=timezone) + try: + context.add_cookies(cookies) + except Exception: + pass + + +__all__ = [ + "build_cookies", + "seed_recaptcha_cookies_async", + "seed_recaptcha_cookies_sync", +] diff --git a/src/invisible_playwright/async_api.py b/src/invisible_playwright/async_api.py index 2933c1e..70a7aeb 100644 --- a/src/invisible_playwright/async_api.py +++ b/src/invisible_playwright/async_api.py @@ -3,11 +3,13 @@ from __future__ import annotations import asyncio import secrets +from pathlib import Path from typing import Any, Dict, Optional, Union -from playwright.async_api import Browser, Playwright, async_playwright +from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright from ._fpforge import Profile, generate_profile +from ._geo import resolve_session_timezone from ._headless import make_virtual_display from ._proxy import configure_proxy as _configure_proxy_shared from .download import ensure_binary @@ -49,6 +51,8 @@ class InvisiblePlaywright: timezone: str = "", extra_prefs: Optional[Dict[str, Any]] = None, binary_path: Optional[str] = None, + profile_dir: Optional[Union[str, Path]] = None, + prep_recaptcha: bool = False, ) -> None: # See sync launcher: `zoom.stealth.fpp.hw_seed` is int32_t — clamp. self.seed: int = int(seed) if seed is not None else secrets.randbits(31) @@ -61,13 +65,24 @@ class InvisiblePlaywright: self._timezone = timezone self._extra_prefs = extra_prefs self._binary_path = binary_path + self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None + # reCAPTCHA pre-seed gated server-side; respect persistent profile. + self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None self._profile: Profile = generate_profile(self.seed, pin=self._pin) self._pw: Optional[Playwright] = None self._browser: Optional[Browser] = None + self._persistent_context: Optional[BrowserContext] = None self._virtual_display: Any = None - async def __aenter__(self) -> Browser: + async def __aenter__(self) -> Union[Browser, BrowserContext]: import sys as _sys + # Resolve timezone="auto" (and the proxy-set-but-unset default) to a + # concrete IANA zone before anything reads self._timezone. Run the + # blocking geo lookup off the event loop. Fail-early if a proxy is set + # but the egress zone can't be resolved. + self._timezone = await asyncio.to_thread( + resolve_session_timezone, self._timezone, self._proxy + ) executable = self._binary_path or ensure_binary() prefs = translate_profile_to_prefs( self._profile, @@ -85,6 +100,24 @@ class InvisiblePlaywright: env = self._build_env() try: self._pw = await async_playwright().start() + if self._profile_dir is not None: + # See sync launcher for the persistent-context rationale. + self._profile_dir.mkdir(parents=True, exist_ok=True) + # firefox-5 ships the C++ overrideTimezone IDL method (C7 + # chiusura), so locale + timezone_id now propagate cleanly + # to the persistent context without hanging the launch. + self._persistent_context = await self._pw.firefox.launch_persistent_context( + user_data_dir=str(self._profile_dir), + executable_path=str(executable), + headless=pw_headless, + firefox_user_prefs=prefs, + proxy=playwright_proxy, + args=self._extra_args, + env=env, + **self._default_context_kwargs(), + ) + _patch_new_page_sleep(self._persistent_context) + return self._persistent_context self._browser = await self._pw.firefox.launch( executable_path=str(executable), headless=pw_headless, @@ -102,12 +135,18 @@ class InvisiblePlaywright: def _patch_new_context_defaults(self, browser: Browser) -> None: original = browser.new_context defaults = self._default_context_kwargs() + prep = self._prep_recaptcha + profile = self._profile # pass the whole Profile (seed + browsing_history) + tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region async def patched(**kw): merged = dict(defaults) merged.update(kw) ctx = await original(**merged) _patch_new_page_sleep(ctx) + if prep: + from ._recaptcha_seed import seed_recaptcha_cookies_async + await seed_recaptcha_cookies_async(ctx, profile, timezone=tz) return ctx browser.new_context = patched # type: ignore[assignment] @@ -134,6 +173,12 @@ class InvisiblePlaywright: await self._teardown() async def _teardown(self) -> None: + if self._persistent_context is not None: + try: + await self._persistent_context.close() + except Exception: + pass + self._persistent_context = None if self._browser is not None: try: await self._browser.close() diff --git a/src/invisible_playwright/cli.py b/src/invisible_playwright/cli.py index bb1c687..eb12067 100644 --- a/src/invisible_playwright/cli.py +++ b/src/invisible_playwright/cli.py @@ -10,7 +10,15 @@ from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION from .download import cache_root, ensure_binary -def _cmd_fetch(_args: argparse.Namespace) -> int: +def _cmd_fetch(args: argparse.Namespace) -> int: + # --force: re-download even if already cached (drop the cached version dir, + # then let ensure_binary fetch it fresh). Useful to recover a corrupted cache + # or re-pull after a re-published release. + if getattr(args, "force", False): + from .download import cache_dir_for_version + d = cache_dir_for_version() + if d.exists(): + shutil.rmtree(d, ignore_errors=True) path = ensure_binary() print(path) return 0 @@ -44,9 +52,17 @@ def _cmd_clear_cache(_args: argparse.Namespace) -> int: def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser(prog="invisible-playwright", description="invisible_playwright CLI") - sub = p.add_subparsers(dest="cmd", required=True) + # Top-level `--version` / `-V` flag so `python -m invisible_playwright --version` + # works (Python convention), in addition to the existing `version` subcommand. + p.add_argument( + "-V", "--version", action="version", + version=f"invisible_playwright {__version__} (BINARY_VERSION={BINARY_VERSION}, Firefox {FIREFOX_UPSTREAM_VERSION})", + ) + sub = p.add_subparsers(dest="cmd") - sub.add_parser("fetch", help="download the patched Firefox binary") + fetch_p = sub.add_parser("fetch", help="download the patched Firefox binary") + fetch_p.add_argument("--force", action="store_true", + help="re-download even if already cached") sub.add_parser("path", help="print the absolute path to the cached binary") sub.add_parser("version", help="print wrapper and binary versions") sub.add_parser("clear-cache", help="remove all cached binaries") @@ -54,7 +70,15 @@ def build_parser() -> argparse.ArgumentParser: def main(argv: list[str] | None = None) -> int: - args = build_parser().parse_args(argv) + parser = build_parser() + args = parser.parse_args(argv) + if args.cmd is None: + # argparse-conventional: print usage + error message to stderr, exit 2. + # We can't keep `required=True` on the subparsers because that breaks + # the top-level `--version` flag (argparse demands a subcommand even + # when --version is the only token). parser.error() preserves the + # original "no subcommand" exit semantics tests expect. + parser.error("a subcommand is required (try --help, --version, or one of: fetch, path, version, clear-cache)") dispatch = { "fetch": _cmd_fetch, "path": _cmd_path, diff --git a/src/invisible_playwright/config.py b/src/invisible_playwright/config.py new file mode 100644 index 0000000..c411512 --- /dev/null +++ b/src/invisible_playwright/config.py @@ -0,0 +1,110 @@ +"""Public helpers for building Firefox launch config without using ``InvisiblePlaywright``. + +Use these when you need to call ``playwright.firefox.launch()`` (or +``firefox.launch_persistent_context()``) directly with our patched binary +and stealth prefs, instead of using the ``InvisiblePlaywright`` context +manager. + +Typical caller is an external integration that owns its own browser +lifecycle (a Crawlee/Skyvern/changedetection-style fetcher, a Playwright +Server wrapper, a multi-language harness) and just wants the building +blocks:: + + from playwright.async_api import async_playwright + from invisible_playwright import ensure_binary, get_default_stealth_prefs + + async with async_playwright() as p: + browser = await p.firefox.launch( + executable_path=str(ensure_binary()), + firefox_user_prefs=get_default_stealth_prefs(seed=42), + ) + +For everyday Python usage the ``InvisiblePlaywright`` context manager is +still the recommended entry point; these helpers expose the same internals +without the lifecycle ownership. + +.. note:: + When calling ``firefox.launch()`` yourself, pass ``headless=False`` and + manage the display hiding (Xvfb on Linux, hidden desktop on Windows) + externally. Passing ``headless=True`` directly to Playwright puts + Firefox in true headless mode, which skips the real rendering pipeline + and breaks canvas / audio / WebGL fingerprint coherence. The + ``InvisiblePlaywright`` context manager does this translation + automatically; the public helpers leave it to the caller. +""" +from __future__ import annotations + +import secrets +from typing import Any, Dict, List, Optional, Union + +from ._fpforge import generate_profile +from .prefs import translate_profile_to_prefs + + +def get_default_stealth_prefs( + seed: Optional[int] = None, + *, + pin: Optional[Dict[str, Any]] = None, + locale: str = "en-US", + timezone: str = "", + extra_prefs: Optional[Dict[str, Any]] = None, + humanize: Union[bool, float] = True, + virtual_display: bool = False, +) -> Dict[str, Any]: + """Build a complete ``firefox_user_prefs`` dict for ``firefox.launch()``. + + Same prefs that ``InvisiblePlaywright(seed=..., locale=..., timezone=..., + extra_prefs=..., humanize=...)`` would inject. Use this when you need to + drive ``playwright.firefox.launch()`` yourself. + + Args: + seed: Integer seed for the Bayesian fingerprint sampler. Same seed + produces the same fingerprint. ``None`` generates a fresh + random int31 (matches ``InvisiblePlaywright`` default). + pin: Optional dict forcing specific fingerprint fields while the + rest stays seed-derived. See ``docs/pinning.md``. + locale: BCP-47 tag (e.g. ``"en-US"``). Drives ``Accept-Language`` + and ``navigator.language``. + timezone: IANA timezone (e.g. ``"America/New_York"``). Empty means + use the host TZ. This pure pref builder does NOT resolve + ``"auto"`` (that needs the proxy + a network lookup at launch + time) — pass a concrete zone here, or use ``InvisiblePlaywright`` + / ``resolve_session_timezone(timezone, proxy)`` for ``"auto"``. + extra_prefs: Optional dict overlaid LAST onto the generated prefs. + humanize: When True (default), every mouse move is expanded into + a Bezier trajectory by the patched Juggler. A float caps the + motion in seconds. False disables the behavior. + virtual_display: When True on Windows, apply GPU-disabling prefs + to prevent GPU process crashes on virtual desktops without + D3D11 backend. + + Returns: + Dict ready to pass as ``firefox_user_prefs=`` to + ``playwright.firefox.launch()`` or ``launch_persistent_context()``. + """ + resolved_seed = int(seed) if seed is not None else secrets.randbits(31) + profile = generate_profile(resolved_seed, pin=pin) + prefs = translate_profile_to_prefs( + profile, + locale=locale, + timezone=timezone, + extra_prefs=extra_prefs, + virtual_display=virtual_display, + ) + prefs["invisible_playwright.humanize"] = bool(humanize) + if humanize: + max_seconds = float(humanize) if not isinstance(humanize, bool) else 1.5 + prefs["invisible_playwright.humanize.maxTime"] = str(max_seconds) + return prefs + + +def get_default_args() -> List[str]: + """Return the default Firefox CLI args to pass via ``args=``. + + Currently empty list, since all our stealth configuration is delivered + via ``firefox_user_prefs`` rather than CLI flags. Exposed for parity + with the ``cloakbrowser.config.get_default_stealth_args`` pattern and + to future-proof integrations that already wire ``args=[*existing, + *get_default_args()]``. + """ + return [] diff --git a/src/invisible_playwright/constants.py b/src/invisible_playwright/constants.py index 43269eb..a4b998c 100644 --- a/src/invisible_playwright/constants.py +++ b/src/invisible_playwright/constants.py @@ -7,7 +7,14 @@ bugfixes don't force a multi-hour Firefox rebuild. from __future__ import annotations # Bump this when a new patched Firefox build is released on GitHub. -BINARY_VERSION: str = "firefox-4" +BINARY_VERSION: str = "firefox-9" + +# Releases known to be broken — ensure_binary() refuses them with a clear error +# instead of handing the user an unusable binary. firefox-8 was packaged without +# the juggler automation layer, so Playwright cannot drive it (TargetClosedError); +# fixed in firefox-9 (package-manifest.in now ships chrome/juggler). A cached +# firefox-8 from before the bump would otherwise keep being used silently. +BROKEN_VERSIONS: frozenset[str] = frozenset({"firefox-8"}) # Underlying Firefox version (for display only; does not drive downloads). FIREFOX_UPSTREAM_VERSION: str = "150.0.1" @@ -19,13 +26,15 @@ BINARY_BASENAME: str = f"firefox-{FIREFOX_UPSTREAM_VERSION}-stealth" def ARCHIVE_NAME(platform_key: str, machine: str) -> str: """Return the platform-specific archive filename. - platform_key: sys.platform ("win32", "linux") - machine: platform.machine() ("AMD64", "x86_64", ...) + platform_key: sys.platform ("win32", "linux", "darwin") + machine: platform.machine() ("AMD64", "x86_64", "arm64", "aarch64", ...) """ pk = platform_key.lower() m = machine.lower() if m in {"amd64", "x86_64"}: arch = "x86_64" + elif m in {"arm64", "aarch64"}: + arch = "arm64" else: raise NotImplementedError(f"unsupported arch: {machine}") @@ -33,16 +42,39 @@ def ARCHIVE_NAME(platform_key: str, machine: str) -> str: return f"{BINARY_BASENAME}-win-{arch}.zip" if pk == "linux": return f"{BINARY_BASENAME}-linux-{arch}.tar.gz" + if pk == "darwin": + return f"{BINARY_BASENAME}-macos-{arch}.tar.gz" raise NotImplementedError(f"unsupported platform: {platform_key}") # Binary entry point relative path inside the extracted archive root. +# macOS ships the .app bundle (renamed to a stable "Firefox.app" by release.yml); +# the wrapper execs the inner binary directly, which sidesteps Gatekeeper. BINARY_ENTRY_REL = { "win32": "firefox.exe", "linux": "firefox", + "darwin": "Firefox.app/Contents/MacOS/firefox", } # GitHub release URL template. The "TODO" owner is resolved at publication time. RELEASE_URL_TEMPLATE = ( "https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}" ) + +# ───────────────────────────────────────────────────────────────────────── +# GeoIP database (timezone="auto" → resolve IANA zone from proxy egress IP) +# ───────────────────────────────────────────────────────────────────────── +# daijro/geoip-all-in-one merges IP2Location LITE + GeoLite2 + DB-IP into a +# single mmdb (country ISO + coordinates + IANA timezone via tzfpy), rebuilt +# weekly. GPL-3.0, so we DOWNLOAD it at runtime into the user cache (like the +# Firefox binary) rather than bundling it into this MIT package. The `-all` +# variant covers IPv4+IPv6. download.py tracks the LATEST release and refreshes +# weekly; GEOIP_MMDB_VERSION is only the cold-cache fallback when the GitHub +# API is unreachable on a machine that has never downloaded the DB. +GEOIP_REPO: str = "daijro/geoip-all-in-one" +GEOIP_MMDB_VERSION: str = "2026.06.03" +GEOIP_ASSET: str = "geoip-aio-all.mmdb.zip" +GEOIP_MMDB_NAME: str = "geoip-aio-all.mmdb" +GEOIP_RELEASE_URL_TEMPLATE: str = ( + "https://github.com/daijro/geoip-all-in-one/releases/download/{tag}/{asset}" +) diff --git a/src/invisible_playwright/download.py b/src/invisible_playwright/download.py index 58a5e8f..acb5d49 100644 --- a/src/invisible_playwright/download.py +++ b/src/invisible_playwright/download.py @@ -5,9 +5,12 @@ import hashlib import os import platform import re +import shutil +import subprocess import sys import tarfile import tempfile +import time import zipfile from pathlib import Path @@ -18,6 +21,11 @@ from .constants import ( ARCHIVE_NAME, BINARY_ENTRY_REL, BINARY_VERSION, + BROKEN_VERSIONS, + GEOIP_ASSET, + GEOIP_MMDB_NAME, + GEOIP_MMDB_VERSION, + GEOIP_RELEASE_URL_TEMPLATE, RELEASE_URL_TEMPLATE, ) @@ -114,8 +122,39 @@ def _extract(archive: Path, dst: Path) -> None: raise RuntimeError(f"unknown archive format: {archive}") +def _post_extract_darwin(app_root: Path, entry: Path) -> None: + """Make an ad-hoc-signed .app launchable on macOS. + + The .app is downloaded via requests (no Finder quarantine attached), but we + strip com.apple.quarantine defensively and ensure the inner binary is + executable. We exec the inner binary directly (not via LaunchServices), so + Gatekeeper's first-launch prompt does not apply; the ad-hoc signature + (applied in release.yml) is what lets the arm64 Mach-O run at all. + """ + app = app_root + # walk up to the .app bundle dir if entry points inside it + for parent in entry.parents: + if parent.name.endswith(".app"): + app = parent + break + try: + subprocess.run(["xattr", "-dr", "com.apple.quarantine", str(app)], check=False) + except FileNotFoundError: + pass + try: + entry.chmod(0o755) + except OSError: + pass + + def ensure_binary(version: str = BINARY_VERSION) -> Path: """Return a path to a runnable Firefox executable. Download if needed.""" + if version in BROKEN_VERSIONS: + raise RuntimeError( + f"{version} is a known-broken release (the juggler automation layer is " + f"missing, so Playwright cannot drive it). Upgrade invisible_playwright " + f"(current BINARY_VERSION={BINARY_VERSION}) or pass a newer version." + ) plat = sys.platform mach = platform.machine() asset = ARCHIVE_NAME(plat, mach) @@ -148,6 +187,142 @@ def ensure_binary(version: str = BINARY_VERSION) -> Path: ) _extract(archive_path, version_dir) + if plat == "darwin": + _post_extract_darwin(version_dir, entry) + if not entry.exists(): raise RuntimeError(f"binary not found after extraction: {entry}") return entry + + +# ───────────────────────────────────────────────────────────────────────── +# GeoIP mmdb (timezone="auto" → map egress IP → IANA zone) +# +# daijro/geoip-all-in-one is rebuilt WEEKLY, so we don't pin a tag. We cache +# the latest mmdb and, once it's older than GEOIP_REFRESH_DAYS, re-check the +# latest release and pull a newer build if one exists. Net effect: no download +# (not even an API call) on a launch within the window; auto-refresh after it; +# a stale cache is reused when offline rather than breaking the launch. +# ───────────────────────────────────────────────────────────────────────── +GEOIP_REFRESH_DAYS = 7 # matches daijro's weekly rebuild cadence + + +def _geoip_root() -> Path: + return cache_root() / "geoip" + + +def _geoip_check_marker() -> Path: + return _geoip_root() / ".last_check" + + +def _cached_geoip_mmdb() -> Path | None: + """Newest cached mmdb across tag dirs, or None. Tag dirs are date strings + (e.g. ``2026.06.03``) so a lexical sort is chronological.""" + root = _geoip_root() + if not root.exists(): + return None + cands = sorted(root.glob("*/*.mmdb")) + return cands[-1] if cands else None + + +def _geoip_cache_fresh(max_age_days: int) -> bool: + marker = _geoip_check_marker() + if not marker.exists(): + return False + return (time.time() - marker.stat().st_mtime) < max_age_days * 86400 + + +def _touch_geoip_marker() -> None: + m = _geoip_check_marker() + m.parent.mkdir(parents=True, exist_ok=True) + m.touch() + + +def _latest_geoip_tag() -> str: + """Latest ``daijro/geoip-all-in-one`` release tag via the GitHub API.""" + headers = {"Accept": "application/vnd.github+json"} + token = _github_token() + if token: + headers["Authorization"] = f"token {token}" + r = requests.get( + f"https://api.github.com/repos/{GEOIP_REPO}/releases/latest", + headers=headers, timeout=15, + ) + r.raise_for_status() + tag = r.json().get("tag_name") + if not tag: + raise RuntimeError("no tag_name in geoip-all-in-one latest release") + return tag + + +def _download_geoip_tag(tag: str) -> Path: + """Download + extract a specific tag's mmdb if not already cached.""" + dst_dir = _geoip_root() / tag + target = dst_dir / GEOIP_MMDB_NAME + if not target.exists(): + url = GEOIP_RELEASE_URL_TEMPLATE.format(tag=tag, asset=GEOIP_ASSET) + dst_dir.mkdir(parents=True, exist_ok=True) + with tempfile.TemporaryDirectory() as td: + archive = Path(td) / GEOIP_ASSET + _download_file(url, archive) + _extract(archive, dst_dir) + if target.exists(): + return target + # asset name inside the zip may differ from GEOIP_MMDB_NAME + found = sorted(dst_dir.glob("*.mmdb")) + if found: + return found[0] + raise RuntimeError(f"geoip mmdb not found after extraction in {dst_dir}") + + +def _prune_old_geoip_tags(keep: str) -> None: + """Drop every cached tag dir except ``keep`` to bound disk usage.""" + root = _geoip_root() + if not root.exists(): + return + for d in root.iterdir(): + if d.is_dir() and d.name != keep: + shutil.rmtree(d, ignore_errors=True) + + +def geoip_mmdb_path() -> Path | None: + """Path to the currently-cached mmdb (newest tag), or None if none cached.""" + return _cached_geoip_mmdb() + + +def ensure_geoip_mmdb(max_age_days: int = GEOIP_REFRESH_DAYS) -> Path: + """Return a geoip mmdb, kept fresh against daijro's weekly rebuild. + + Resolution order: + 1. ``STEALTHFOX_GEOIP_MMDB`` env → use that file (user-supplied / test). + 2. A cached mmdb younger than ``max_age_days`` → use it (no network). + 3. Else ask GitHub for the latest tag, download it if not already cached, + prune older tags, and reset the freshness timer. + 4. If the API/download is unreachable but a cached mmdb exists → use it + (and reset the timer so we don't hammer the API while offline). + 5. Cold cache + no network → fall back to the pinned ``GEOIP_MMDB_VERSION``; + if that download also fails, raise. + """ + override = os.environ.get("STEALTHFOX_GEOIP_MMDB") + if override: + p = Path(override) + if not p.exists(): + raise RuntimeError(f"STEALTHFOX_GEOIP_MMDB points to a missing file: {p}") + return p + + cached = _cached_geoip_mmdb() + if cached and _geoip_cache_fresh(max_age_days): + return cached + + try: + tag = _latest_geoip_tag() + except Exception: + if cached: + _touch_geoip_marker() # recheck after the window; don't hammer + return cached + tag = GEOIP_MMDB_VERSION # cold cache + API down → pinned fallback + + mmdb = _download_geoip_tag(tag) + _prune_old_geoip_tags(mmdb.parent.name) + _touch_geoip_marker() + return mmdb diff --git a/src/invisible_playwright/launcher.py b/src/invisible_playwright/launcher.py index b79e4ff..15055ee 100644 --- a/src/invisible_playwright/launcher.py +++ b/src/invisible_playwright/launcher.py @@ -2,11 +2,13 @@ from __future__ import annotations import secrets +from pathlib import Path from typing import Any, Dict, Optional, Union -from playwright.sync_api import Browser, Playwright, sync_playwright +from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright from ._fpforge import Profile, generate_profile +from ._geo import resolve_session_timezone from ._headless import make_virtual_display from ._proxy import configure_proxy as _configure_proxy_shared from .download import ensure_binary @@ -111,6 +113,8 @@ class InvisiblePlaywright: timezone: str = "", extra_prefs: Optional[Dict[str, Any]] = None, binary_path: Optional[str] = None, + profile_dir: Optional[Union[str, Path]] = None, + prep_recaptcha: bool = False, ) -> None: """ Args: @@ -132,11 +136,26 @@ class InvisiblePlaywright: a float caps the motion in seconds. locale: BCP-47 tag (e.g. ``"en-US"``). Drives the ``Accept-Language`` header and ``navigator.language``. - timezone: IANA timezone (e.g. ``"America/New_York"``). Empty - means use the host TZ. + timezone: IANA zone (e.g. ``"America/New_York"``) — used as-is + when set, the only way to force a specific zone. ``""`` + (default) or ``"auto"`` ALWAYS resolves from the egress IP: + through the proxy when one is set, otherwise from the host's + own public IP (one lookup + an offline mmdb). On failure: with + a proxy it raises (a foreign proxy on the host TZ is the + ``timezone_mismatch`` signal); without a proxy it falls back to + the host TZ so a transient lookup failure can't break launch. extra_prefs: Optional dict of Firefox prefs overlayed on top of the generated profile — useful for niche tweaks without monkey-patching the package. + profile_dir: Path to a persistent Firefox profile directory. + When set, the session uses ``launch_persistent_context()`` + so cookies, localStorage, sessionStorage, extensions, cache + and prefs are kept on disk between runs. ``__enter__`` + returns a ``BrowserContext`` (not a ``Browser``) — use it + directly: ``with InvisiblePlaywright(profile_dir=p) as ctx: + page = ctx.new_page()``. First run creates the dir; + subsequent runs reuse it. Pair with a stable ``seed=`` to + also pin the fingerprint identity across runs. """ # Constrain to int31 — Firefox's `zoom.stealth.fpp.hw_seed` and # related stealth prefs are declared as ``int32_t`` in @@ -154,12 +173,22 @@ class InvisiblePlaywright: self._timezone = timezone self._extra_prefs = extra_prefs self._binary_path = binary_path + self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None + # reCAPTCHA cookie pre-seed — opt-in. Gated server-side: if a + # persistent profile_dir is in use, respect its existing cookies + # and DON'T enable pre-seed (the profile owns its own state). + self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None self._profile: Profile = generate_profile(self.seed, pin=self._pin) self._pw: Optional[Playwright] = None self._browser: Optional[Browser] = None + self._persistent_context: Optional[BrowserContext] = None self._virtual_display: Any = None - def __enter__(self) -> Browser: + def __enter__(self) -> Union[Browser, BrowserContext]: + # Resolve timezone="auto" (and the proxy-set-but-unset default) to a + # concrete IANA zone before anything reads self._timezone. Fail-early + # if a proxy is set but the egress zone can't be resolved. + self._timezone = resolve_session_timezone(self._timezone, self._proxy) executable = self._binary_path or ensure_binary() prefs = self._build_prefs() playwright_proxy = _configure_proxy_shared(self._proxy, prefs) @@ -168,6 +197,25 @@ class InvisiblePlaywright: try: self._pw = sync_playwright().start() + if self._profile_dir is not None: + # Persistent context — cookies / localStorage / extensions / + # prefs all live on disk between runs. Stealth prefs are + # re-injected via firefox_user_prefs on every launch (Playwright + # writes them to user.js, which overrides anything in + # prefs.js inside the persistent dir). + self._profile_dir.mkdir(parents=True, exist_ok=True) + self._persistent_context = self._pw.firefox.launch_persistent_context( + user_data_dir=str(self._profile_dir), + executable_path=str(executable), + headless=pw_headless, + firefox_user_prefs=prefs, + proxy=playwright_proxy, + args=self._extra_args, + env=env, + **self._persistent_context_kwargs(), + ) + _patch_sync_new_page_sleep(self._persistent_context) + return self._persistent_context self._browser = self._pw.firefox.launch( executable_path=str(executable), headless=pw_headless, @@ -185,6 +233,22 @@ class InvisiblePlaywright: self._patch_new_context_defaults(self._browser) return self._browser + def _persistent_context_kwargs(self) -> Dict[str, Any]: + """Context-level kwargs accepted by launch_persistent_context. + + Identical to ``_default_context_kwargs``: viewport / screen / DPR / + color-scheme / locale / timezone_id. Up to firefox-4 we had to drop + locale and timezone_id because Playwright's per-realm overrides + called IDL methods (``docShell.languageOverride``, + ``docShell.overrideTimezone``) that weren't exposed by our patched + build, causing launch_persistent_context to hang for 180s. From + firefox-5 (C7 chiusura), the C++ ``overrideTimezone`` method is + present and ``languageOverride`` was already there, so the + per-realm overrides land and the persistent context starts in + ~20s like the non-persistent path. + """ + return self._default_context_kwargs() + def _patch_new_context_defaults(self, browser: Browser) -> None: """Wrap ``browser.new_context`` so its defaults derive from the profile (viewport, screen, DPR, color-scheme). Users get a @@ -192,12 +256,18 @@ class InvisiblePlaywright: """ original = browser.new_context defaults = self._default_context_kwargs() + prep = self._prep_recaptcha + profile = self._profile # pass the whole Profile (seed + browsing_history) + tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region def patched(**kw): merged = dict(defaults) merged.update(kw) # user-supplied wins ctx = original(**merged) _patch_sync_new_page_sleep(ctx) + if prep: + from ._recaptcha_seed import seed_recaptcha_cookies_sync + seed_recaptcha_cookies_sync(ctx, profile, timezone=tz) return ctx browser.new_context = patched # type: ignore[assignment] @@ -226,6 +296,12 @@ class InvisiblePlaywright: self._teardown() def _teardown(self) -> None: + if self._persistent_context is not None: + try: + self._persistent_context.close() + except Exception: + pass + self._persistent_context = None if self._browser is not None: try: self._browser.close() diff --git a/src/invisible_playwright/prefs.py b/src/invisible_playwright/prefs.py index 43ece27..4f0a15d 100644 --- a/src/invisible_playwright/prefs.py +++ b/src/invisible_playwright/prefs.py @@ -289,13 +289,29 @@ _BASELINE: Dict[str, Any] = { "network.dns.echconfig.enabled": False, "network.dns.use_https_rr_as_altsvc": False, - # === A/B VARIANT B: Fission disabled === - # Force single content-process model (e10s only, no BC outer/inner split). - # Diagnostic for the FF150 BC-swap theory: if peet_ws/fppro/sannysoft - # work with this off, the Juggler FF146 baseline breaks specifically on - # cross-process navigation tracking. + # === Fission / site-isolation disabled (FF146 Playwright parity) === + # Force a single content-process model. Three knobs are required in FF150: + # upstream Playwright Firefox (FF146-based) only needed fission.autostart=False + # because FF146's default isolation strategy was looser. FF150 ships with + # fission.webContentIsolationStrategy=1 (IsolateEverything) which still + # site-isolates cross-origin iframes into separate `webIsolated` content + # processes EVEN WHEN fission.autostart is False. From the parent process's + # point of view, those iframes get a Juggler Frame placeholder with no + # docShell, no URL, and an execution context that wraps the wrong global, + # so frame.evaluate() fails with cross-origin SOP errors and + # element_handle.content_frame() returns None. + # + # Pinning the strategy to 0 keeps every cross-origin web iframe in the + # parent's content process, where the Juggler code paths from the FF146 + # era expect them. processCount.webIsolated=1 is kept as belt-and-suspenders + # in case some path still classifies an origin as webIsolated despite the + # strategy change. It costs nothing to leave. + # + # See issue #20 + tests/test_cross_origin_iframe.py for the regression + # sentinel that catches a future A/B flipping these back. "fission.autostart": False, "fission.autostart.session": False, + "fission.webContentIsolationStrategy": 0, # IsolateNothing "dom.ipc.processCount.webIsolated": 1, @@ -384,6 +400,21 @@ _WIN_VIRT_DESKTOP_WORKAROUNDS: Dict[str, Any] = { # Bugzilla refs: 1798091, 1524591, 1229829. Lowering the GPU sandbox to 0 # restores hardware compositor + functional WebGL on alt desktops. "security.sandbox.gpu.level": 0, + # Same root cause as above, content process side. Wrapper repo issue #18 + # (tab crash on cross-process navigation under headless=True). Sandbox + # content level > 4 puts content processes on the sandbox's own + # kAlternateWinstation (see security/sandbox/win/src/sandboxbroker/ + # sandboxBroker.cpp line 1113-1114: + # `if (aSandboxLevel > 4) config->SetDesktop(kAlternateWinstation)`). + # Combined with our CreateDesktop alt-desktop, that puts browser process + # and content processes on DIFFERENT desktops. Cross-process navigation + # then fails window parenting between parent and child, the content + # process exits cleanly (exitCode=0, signal=null) and Playwright fires + # page.on('crash') ~10s after page load. Lowering content sandbox to 4 + # keeps content processes on the same desktop as the browser process, + # which is what we want here (still tight enough — level 4 blocks + # file/registry write, network calls, hardware access). + "security.sandbox.content.level": 4, } diff --git a/tests/conftest.py b/tests/conftest.py index 429aa6d..900732b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,12 @@ +import os import random +import sys +from pathlib import Path import pytest from invisible_playwright._fpforge import generate_profile +from invisible_playwright.constants import BINARY_ENTRY_REL @pytest.fixture @@ -15,3 +19,36 @@ def deterministic_rng(): def sample_profile(): """A Profile generated from seed=42 for reuse across tests.""" return generate_profile(seed=42) + + +@pytest.fixture(scope="session") +def firefox_binary(): + """Locate the patched Firefox binary for E2E tests, or skip cleanly. + + Single source of truth for every E2E test (previously each test file had its + own copy — and three of them silently ignored INVPW_BINARY_PATH, so they kept + testing whatever was in the cache even when you pointed the suite at a + specific build: a false-confidence trap). Lookup order: + + 1. ``INVPW_BINARY_PATH`` env var — point the whole suite at a local build + or a freshly-extracted release (this is how the full-suite gate runs). + 2. Cached binary under ``cache_dir_for_version()`` (post ``fetch``). + 3. Skip — we never trigger an implicit multi-hundred-MB network download + inside a test run. + """ + env_path = os.environ.get("INVPW_BINARY_PATH") + if env_path: + if Path(env_path).exists(): + return env_path + pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist") + + if sys.platform not in BINARY_ENTRY_REL: + pytest.skip(f"unsupported platform: {sys.platform}") + from invisible_playwright.download import cache_dir_for_version + entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform] + if not entry.exists(): + pytest.skip( + "patched Firefox binary not cached and INVPW_BINARY_PATH unset; " + "set INVPW_BINARY_PATH= or run `invisible-playwright fetch`" + ) + return str(entry) diff --git a/tests/test_constants.py b/tests/test_constants.py index 8d124a7..911ad70 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -5,11 +5,26 @@ from invisible_playwright.constants import ( BINARY_BASENAME, BINARY_ENTRY_REL, BINARY_VERSION, + BROKEN_VERSIONS, FIREFOX_UPSTREAM_VERSION, RELEASE_URL_TEMPLATE, ) +@pytest.mark.unit +def test_broken_versions_excludes_current(): + """The current BINARY_VERSION must NEVER be in BROKEN_VERSIONS — otherwise + every default ensure_binary() call would raise and the wrapper is unusable.""" + assert BINARY_VERSION not in BROKEN_VERSIONS + + +@pytest.mark.unit +def test_firefox_8_is_marked_broken(): + """firefox-8 shipped without the juggler layer (undrivable by Playwright); + it must stay flagged so a stale cache can't silently hand it to a user.""" + assert "firefox-8" in BROKEN_VERSIONS + + @pytest.mark.unit def test_binary_version_format(): assert BINARY_VERSION.startswith("firefox-") @@ -31,9 +46,16 @@ def test_archive_name_linux(): @pytest.mark.unit -def test_archive_name_unsupported_raises(): +def test_archive_name_macos_arm64(): + name = ARCHIVE_NAME("darwin", "arm64") + assert name.endswith(".tar.gz") + assert "macos-arm64" in name + + +@pytest.mark.unit +def test_archive_name_truly_unsupported_raises(): with pytest.raises(NotImplementedError): - ARCHIVE_NAME("darwin", "arm64") + ARCHIVE_NAME("plan9", "x86_64") @pytest.mark.unit @@ -77,20 +99,18 @@ def test_archive_name_rejects_unsupported_arches(machine): @pytest.mark.unit @pytest.mark.parametrize("machine", ["arm64", "aarch64"]) -def test_archive_name_arm64_not_yet_supported(machine): - """ARM64 is a frequent request (issue #6). Until binaries exist for it, - ARCHIVE_NAME should hard-fail rather than silently degrade. If this test - starts failing because someone shipped ARM64 builds, replace it with the - positive case.""" - with pytest.raises(NotImplementedError): - ARCHIVE_NAME("linux", machine) +def test_archive_name_arm64_supported(machine): + """ARM64 is shipped now (issue #6): both Linux aarch64 and macOS arm64. + ARCHIVE_NAME must map both machine spellings to the canonical -arm64 asset.""" + assert ARCHIVE_NAME("linux", machine) == "firefox-150.0.1-stealth-linux-arm64.tar.gz" + assert ARCHIVE_NAME("darwin", machine) == "firefox-150.0.1-stealth-macos-arm64.tar.gz" @pytest.mark.unit -@pytest.mark.parametrize("platform_key", ["darwin", "freebsd", "cygwin", "openbsd"]) +@pytest.mark.parametrize("platform_key", ["freebsd", "cygwin", "openbsd"]) def test_archive_name_rejects_unsupported_platforms(platform_key): - """Same logic — non-Linux/non-Windows platforms must raise, not silently - pick one of the two.""" + """win32/linux/darwin are supported; other platforms must raise, not + silently pick one of the three.""" with pytest.raises(NotImplementedError, match=platform_key): ARCHIVE_NAME(platform_key, "x86_64") @@ -104,7 +124,7 @@ def test_archive_name_rejects_unsupported_platforms(platform_key): def test_binary_entry_rel_covers_every_supported_platform(): """If ARCHIVE_NAME accepts a platform key, BINARY_ENTRY_REL must declare where the executable lives inside the archive for it.""" - for plat in ["win32", "linux"]: + for plat in ["win32", "linux", "darwin"]: ARCHIVE_NAME(plat, "x86_64") # must not raise assert plat in BINARY_ENTRY_REL, ( f"ARCHIVE_NAME accepts {plat!r} but BINARY_ENTRY_REL has no entry " @@ -118,6 +138,7 @@ def test_binary_entry_rel_extension_matches_platform(): assert BINARY_ENTRY_REL["win32"].endswith(".exe") assert not BINARY_ENTRY_REL["linux"].endswith(".exe") assert BINARY_ENTRY_REL["linux"] == "firefox" + assert BINARY_ENTRY_REL["darwin"].endswith(".app/Contents/MacOS/firefox") # ---- RELEASE_URL_TEMPLATE shape ------------------------------------------- # diff --git a/tests/test_cross_origin_iframe.py b/tests/test_cross_origin_iframe.py new file mode 100644 index 0000000..26df483 --- /dev/null +++ b/tests/test_cross_origin_iframe.py @@ -0,0 +1,278 @@ +"""Regression tests for cross-origin / cross-process iframe interaction. + +History: wrapper repo issue #20 reported that a third-party cookie +consent iframe was completely unreachable from Playwright in 0.1.7 — +``element_handle.content_frame()`` returned ``None``, ``frame.evaluate()`` +threw cross-origin SOP errors, and ``frame_locator().click()`` timed +out. + +Root cause was a missing pref. FF150 ships with +``fission.webContentIsolationStrategy=1`` (IsolateEverything), which +site-isolates cross-origin iframes into separate webIsolated content +processes even when ``fission.autostart=False``. The Juggler code paths +inherited from the FF146 era assume same-process iframes. The wrapper's +``_BASELINE`` now pins the pref to 0 (IsolateNothing). + +These tests exist so a future Firefox upgrade or a fingerprint A/B +that flips this pref by accident cannot ship without a red CI signal. + +Layers: + * ``unit`` — ``_BASELINE`` contains the pref with the right value. No browser. + * ``e2e`` — launch the real binary against a LOCAL HTTP harness on + ``127.0.0.1`` (two ports = two SOP origins) and verify the + four protocol operations that regressed: frame URL tracking, + ``handle.content_frame()``, ``frame.evaluate()``, and + ``frame_locator(...).locator(...)`` element resolution. + +The e2e tests run entirely offline. They never call out to a real site; +the cross-origin shape is reproduced with two local HTTP servers on +random free ports. +""" +from __future__ import annotations + +import socket +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest + +from invisible_playwright._fpforge import generate_profile +from invisible_playwright.prefs import _BASELINE, translate_profile_to_prefs + + +# ──────────────────────────────────────────────────────────────────── +# Unit layer — fast, no browser, runs on every CI +# ──────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_baseline_pins_web_content_isolation_strategy_to_zero(): + """Regression sentinel. + + ``fission.webContentIsolationStrategy`` MUST be 0 (IsolateNothing). + The FF150 default is 1 (IsolateEverything), which site-isolates + cross-origin iframes into separate webIsolated content processes + and breaks Playwright frame tracking from the parent process. + """ + assert _BASELINE["fission.webContentIsolationStrategy"] == 0, ( + "fission.webContentIsolationStrategy must be 0 (IsolateNothing). " + "If you bumped it for an A/B, cross-origin iframes will appear " + "in page.frames with empty URLs and content_frame() will return " + "None — see the changelog entry that introduced this test." + ) + + +@pytest.mark.unit +def test_baseline_keeps_fission_autostart_off(): + """Belt for the suspenders above. All three prefs are required.""" + assert _BASELINE["fission.autostart"] is False + assert _BASELINE["fission.autostart.session"] is False + assert _BASELINE["dom.ipc.processCount.webIsolated"] == 1 + + +@pytest.mark.unit +def test_translated_profile_propagates_isolation_strategy(): + """The fix must survive translate_profile_to_prefs, not just live in _BASELINE.""" + p = generate_profile(seed=42) + prefs = translate_profile_to_prefs(p) + assert prefs["fission.webContentIsolationStrategy"] == 0 + + +@pytest.mark.unit +def test_extra_prefs_override_can_break_isolation_only_explicitly(): + """If a caller wants to A/B isolation, they have to set it explicitly. + The wrapper does not silently flip it back on. + """ + p = generate_profile(seed=42) + prefs_default = translate_profile_to_prefs(p) + assert prefs_default["fission.webContentIsolationStrategy"] == 0 + + prefs_ab = translate_profile_to_prefs( + p, extra_prefs={"fission.webContentIsolationStrategy": 1} + ) + assert prefs_ab["fission.webContentIsolationStrategy"] == 1 + + +# ──────────────────────────────────────────────────────────────────── +# E2E layer — needs cached binary + bind to localhost ports +# ──────────────────────────────────────────────────────────────────── + + +def _free_port() -> int: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + s.close() + return port + + +class _SilentHandler(BaseHTTPRequestHandler): + """Suppress per-request access logging so pytest output stays clean.""" + PAYLOAD = b"" # set per-instance via subclassing + + def log_message(self, *_a): + pass + + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(self.PAYLOAD) + + +def _serve(payload: bytes, port: int) -> HTTPServer: + """Start an HTTP server on 127.0.0.1:port serving ``payload`` on every GET.""" + handler_cls = type( + "_H", (_SilentHandler,), {"PAYLOAD": payload} + ) + srv = HTTPServer(("127.0.0.1", port), handler_cls) + t = threading.Thread(target=srv.serve_forever, daemon=True) + t.start() + return srv + + +@pytest.fixture +def cross_origin_harness(): + """Spin up TWO local HTTP servers on different localhost ports. + + Two ports = two distinct origins under SOP (same host, different port + → different origin). The parent page on port A embeds an iframe with + src pointing at port B. Same cross-origin browsing-context shape as + a parent-page-plus-third-party-iframe layout, fully offline. + """ + pa, pb = _free_port(), _free_port() + parent_html = f"""parent +

parent

+ + + +""".encode("utf-8") + child_html = b""" + + + +""" + sa = _serve(parent_html, pa) + sb = _serve(child_html, pb) + try: + yield {"parent_url": f"http://127.0.0.1:{pa}/", "child_origin": f"http://127.0.0.1:{pb}"} + finally: + sa.shutdown() + sb.shutdown() + + +@pytest.mark.e2e +def test_cross_origin_iframe_url_appears_in_page_frames(firefox_binary, cross_origin_harness): + """``page.frames`` must list the cross-origin iframe with its real URL. + + Before the pref fix, the URL came back as '' because the navigation + observer for the iframe fired in a different content process than + the parent's FrameTree was registered in. + """ + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) + page.wait_for_selector("iframe#ifr_plain", timeout=10_000) + page.wait_for_timeout(500) + + urls = [f.url for f in page.frames] + assert any(cross_origin_harness["child_origin"] in (u or "") for u in urls), ( + f"no frame had the child origin in its URL; page.frames urls = {urls!r}" + ) + + +@pytest.mark.e2e +def test_cross_origin_iframe_content_frame_resolves(firefox_binary, cross_origin_harness): + """``handle.content_frame()`` must return a Frame (not None) for every + cross-origin iframe shape we care about: plain, sandboxed, titled. + """ + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) + page.wait_for_selector("iframe#ifr_plain", timeout=10_000) + page.wait_for_timeout(500) + + for sel in ("iframe#ifr_plain", "iframe#ifr_sandbox", "iframe#ifr_titled"): + handle = page.query_selector(sel) + assert handle is not None, f"{sel!r} not found in DOM" + cf = handle.content_frame() + assert cf is not None, f"{sel!r}: content_frame() returned None" + assert cross_origin_harness["child_origin"] in (cf.url or ""), ( + f"{sel!r}: content_frame().url = {cf.url!r}, " + f"expected child origin {cross_origin_harness['child_origin']!r}" + ) + + +@pytest.mark.e2e +def test_cross_origin_iframe_evaluate_returns_real_values(firefox_binary, cross_origin_harness): + """``frame.evaluate()`` inside the cross-origin iframe must work. + + Pre-fix: every evaluate failed with a cross-origin SOP error because + the iframe ended up with a stale/wrong execution context. + """ + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) + page.wait_for_selector("iframe#ifr_plain", timeout=10_000) + page.wait_for_timeout(500) + + cf = page.query_selector("iframe#ifr_plain").content_frame() + assert cf is not None + href = cf.evaluate("() => location.href") + assert cross_origin_harness["child_origin"] in href + title = cf.evaluate("() => document.title") + assert isinstance(title, str) + n_buttons = cf.evaluate("() => document.querySelectorAll('button').length") + assert n_buttons == 2 + + +@pytest.mark.e2e +def test_cross_origin_iframe_frame_locator_resolves_button(firefox_binary, cross_origin_harness): + """``frame_locator(...).locator(...)`` must reach the button inside the iframe.""" + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) + page.wait_for_selector("iframe#ifr_plain", timeout=10_000) + + for selector in ("button#ok", "button.btn-primary"): + cnt = page.frame_locator("iframe#ifr_plain").locator(selector).count() + assert cnt == 1, f"locator({selector!r}) found {cnt} elements (expected 1)" + + +@pytest.mark.e2e +def test_cross_origin_iframe_dispatch_event_click_works(firefox_binary, cross_origin_harness): + """End-to-end interaction via ``dispatch_event`` must succeed. + + Plain ``.click()`` can trip Playwright's actionability heuristic on + some third-party UIs (same on vanilla Playwright Firefox — not our + regression), but ``dispatch_event('click')`` always works once the + iframe is reachable. + """ + from invisible_playwright import InvisiblePlaywright + + with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: + ctx = browser.new_context() + page = ctx.new_page() + page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) + page.wait_for_selector("iframe#ifr_plain", timeout=10_000) + + page.frame_locator("iframe#ifr_plain").locator("button#ok").dispatch_event( + "click", timeout=4_000 + ) + cf = page.query_selector("iframe#ifr_plain").content_frame() + assert cf.evaluate("() => document.title") == "clicked" diff --git a/tests/test_detectors_e2e.py b/tests/test_detectors_e2e.py new file mode 100644 index 0000000..821a186 --- /dev/null +++ b/tests/test_detectors_e2e.py @@ -0,0 +1,171 @@ +"""E2E: run the REAL open-source detectors against the patched binary, on CI. + +Instead of our own hand-rolled signal checks, this loads the actual detection +libraries and uses their FULL API surface: + + * BotD (@fingerprintjs/botd, MIT) — the client-side bot detector that + FingerprintJS Pro itself uses. We assert the aggregate verdict + (``detect().bot == False``) AND every one of its ~18 individual detectors + (``getDetections()``) returns ``bot == False``. The per-detector view is + why we could delete our hand-rolled ``test_botd_*`` mirrors — the real + library now covers each detector, with the same granularity. + * FingerprintJS open-source (MIT) — ``get()`` must return a ``visitorId`` + that is STABLE across two fresh launches with the same seed (an + over-randomized spoof drifts), and a RICH component set (the fingerprint + surface is real, not a stub). + +Everything is hermetic: the libraries are vendored (tests/vendor/) and served +from a localhost HTTP server — no external CDN call (Firefox tracking-protection +blocks the CDN anyway) and no IP/network dependency. Runs identically on a dev +box and on a GitHub runner. + +NOT covered: FingerprintJS *Pro* (commercial, server-side, IP/residential +analysis) — can't be self-hosted, stays the local realness gate. +""" +from __future__ import annotations + +import http.server +import socketserver +import threading +from pathlib import Path + +import pytest + +from invisible_playwright import InvisiblePlaywright + +_VENDOR = Path(__file__).parent / "vendor" +_BOTD = "botd-2.0.0.esm.js" +_FPJS = "fingerprintjs-5.2.0.umd.min.js" + +_PAGE = f""" +detectors + +

loading

+""" + + +class _DetectorSite: + """Localhost server: `/` → the page; `/` → the vendored bundle.""" + + def __init__(self): + page = _PAGE.encode() + vendor = _VENDOR + + class H(http.server.BaseHTTPRequestHandler): + def do_GET(self): # noqa: N802 + if self.path == "/" or self.path.startswith("/?"): + body, ctype = page, "text/html; charset=utf-8" + else: + f = vendor / Path(self.path.lstrip("/")).name + if not f.is_file(): + self.send_error(404); return + body = f.read_bytes() + ctype = "text/javascript; charset=utf-8" + self.send_response(200) + self.send_header("Content-Type", ctype) + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, *a): + pass + + self._srv = socketserver.TCPServer(("127.0.0.1", 0), H) + self.port = self._srv.server_address[1] + threading.Thread(target=self._srv.serve_forever, daemon=True).start() + + @property + def url(self): + return f"http://127.0.0.1:{self.port}/" + + def close(self): + self._srv.shutdown() + + +@pytest.fixture(scope="module") +def detector_site(): + s = _DetectorSite() + yield s + s.close() + + +def _run_detectors(firefox_binary, url): + """Launch the binary, load the page, return (botd, fp, err).""" + with InvisiblePlaywright(seed=42, binary_path=firefox_binary) as browser: + page = browser.new_page() + page.goto(url, wait_until="load", timeout=45000) + page.wait_for_function( + "() => document.getElementById('state').textContent === 'done'", + timeout=45000, + ) + botd = page.evaluate("() => window.__botd") + fp = page.evaluate("() => window.__fp") + err = page.evaluate("() => window.__err") + return botd, fp, err + + +@pytest.mark.e2e +def test_botd_no_detector_flags_automation(firefox_binary, detector_site): + """The real BotD must not flag the build — aggregate AND every one of its + individual detectors (webDriver/userAgent/appVersion/plugins/process/... ). + """ + botd, _fp, err = _run_detectors(firefox_binary, detector_site.url) + assert botd is not None, f"BotD produced no result (err:{err!r})" + assert botd.get("bot") is False, ( + f"BotD aggregate flagged a bot: botKind={botd.get('botKind')!r}" + ) + detections = botd.get("detections") or {} + assert detections, f"BotD getDetections() returned nothing (err:{err!r})" + flagged = {k: v.get("botKind") for k, v in detections.items() if v.get("bot")} + assert not flagged, f"BotD individual detectors flagged automation: {flagged}" + + +@pytest.mark.e2e +def test_fingerprintjs_visitorid_stable_across_launches(firefox_binary, detector_site): + """FingerprintJS visitorId must be present and identical across two fresh + launches with the same seed — a real browser is stable; an over-randomized + spoof drifts (and a drifting fingerprint is itself a bot tell).""" + _b1, fp1, err1 = _run_detectors(firefox_binary, detector_site.url) + _b2, fp2, err2 = _run_detectors(firefox_binary, detector_site.url) + assert fp1 and fp1.get("visitorId"), f"no visitorId on run 1 (err:{err1!r})" + assert fp2 and fp2.get("visitorId"), f"no visitorId on run 2 (err:{err2!r})" + assert fp1["visitorId"] == fp2["visitorId"], ( + f"FingerprintJS visitorId drifted across launches: " + f"{fp1['visitorId']!r} != {fp2['visitorId']!r} (per-session entropy = bot tell)" + ) + + +@pytest.mark.e2e +def test_fingerprintjs_collects_rich_fingerprint(firefox_binary, detector_site): + """FingerprintJS must collect a RICH component surface (a real browser + exposes many signals; a stripped/blocked surface is itself suspicious). + We don't assert zero errored components (some are legitimately unsupported + per browser), only that the surface is substantial and the id computed.""" + _b, fp, err = _run_detectors(firefox_binary, detector_site.url) + assert fp and fp.get("visitorId"), f"FingerprintJS produced no id (err:{err!r})" + keys = fp.get("componentKeys") or [] + assert len(keys) >= 15, ( + f"FingerprintJS collected only {len(keys)} components — surface too thin " + f"(suppressed signals are themselves a tell): {keys}" + ) diff --git a/tests/test_download.py b/tests/test_download.py index b32dced..e4159ca 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -418,7 +418,7 @@ def test_github_token_none_when_unset(monkeypatch): # Bonus coverage: unsupported platform raises NotImplementedError before any HTTP @pytest.mark.unit def test_ensure_binary_unsupported_platform_raises(monkeypatch): - monkeypatch.setattr("sys.platform", "darwin") + monkeypatch.setattr("sys.platform", "freebsd") # win32/linux/darwin are supported import platform monkeypatch.setattr(platform, "machine", lambda: "AMD64") with pytest.raises(NotImplementedError, match="unsupported platform"): @@ -832,3 +832,11 @@ def test_parse_owner_repo_handles_repos_with_dashes_and_underscores(): ) assert owner == "my-org" assert repo == "my_cool.repo" + + +@pytest.mark.unit +def test_ensure_binary_refuses_known_broken_version(): + """A known-broken release (firefox-8, no juggler) must be refused with a + clear error BEFORE any download — never silently handed to the user.""" + with pytest.raises(RuntimeError, match="known-broken"): + ensure_binary("firefox-8") diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 35fad98..d2e59f2 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -8,33 +8,9 @@ handling) do not need a binary and always run. """ from __future__ import annotations -import sys - import pytest from invisible_playwright import InvisiblePlaywright -from invisible_playwright.constants import BINARY_ENTRY_REL - - -@pytest.fixture(scope="session") -def firefox_binary(): - """Locate the patched Firefox binary or skip the calling test. - - We do NOT trigger a network download here: ``ensure_binary`` would - pull a multi-hundred-megabyte archive from a private release, - which is not appropriate inside a unit/E2E test run. Instead we - look for an already-cached binary; if missing we skip. - """ - if sys.platform not in BINARY_ENTRY_REL: - pytest.skip(f"unsupported platform: {sys.platform}") - from invisible_playwright.download import cache_dir_for_version - entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform] - if not entry.exists(): - pytest.skip( - "patched Firefox binary not cached; run `invisible-playwright fetch` " - "to enable E2E tests" - ) - return str(entry) # ──────────────────────────────────────────────────────────────────── diff --git a/tests/test_fingerprint_consistency.py b/tests/test_fingerprint_consistency.py new file mode 100644 index 0000000..9912299 --- /dev/null +++ b/tests/test_fingerprint_consistency.py @@ -0,0 +1,510 @@ +"""Fingerprint consistency / lie-detection tests. + +Complementary to test_fingerprint_surface.py: those tests ask "do you +look like a real browser?" — these ask "are your fingerprint surfaces +INTERNALLY CONSISTENT?" + +Anti-bot systems catch spoofers not by checking each signal in +isolation but by cross-checking related signals. If you spoof UA to +"Windows" but leave navigator.platform as "Linux x86_64", or you spoof +WebGL renderer in the main thread but not in a Web Worker, the +inconsistency proves the spoof is fake. + +Sources studied (all FOSS, MIT-licensed): + - creepjs/src/lies/index.ts — the canonical lie detector + - creepjs/src/worker/index.ts — main-vs-worker scope cross-check + - creepjs/src/math/index.ts — Math.x(p) deterministic equality + - creepjs/src/navigator/index.ts — UA/platform/oscpu invariants + - niespodd/browser-fingerprinting README — worker hwConcurrency, + plugin chain, perf.timeOrigin + +Everything runs against `about:blank` with NO network and NO proxy. + +Run only this file: + pytest tests/test_fingerprint_consistency.py -m e2e -v +""" +from __future__ import annotations + +import pytest + +from invisible_playwright import InvisiblePlaywright + + +PIN = { + "screen.width": 1920, + "screen.height": 1080, + "screen.avail_width": 1920, + "screen.avail_height": 1040, + "screen.dpr": 1.0, + "hardware.concurrency": 8, + "audio.sample_rate": 48000, + "audio.max_channel_count": 2, +} + + +@pytest.fixture(scope="module") +def page(firefox_binary): + with InvisiblePlaywright( + seed=42, + pin=PIN, + binary_path=firefox_binary, + headless=True, + ) as browser: + ctx = browser.new_context() + p = ctx.new_page() + p.goto("about:blank", timeout=30_000) + yield p + + +def _ev(page, expr): + return page.evaluate(expr) + + +# =========================================================================== +# 1. Math determinism — same input MUST yield same output +# Source: creepjs/src/math/index.ts +# A wrapper that adds noise to Math.* (canvas-spoofing prefs) exposes +# itself here: two consecutive calls with the same input must be +# byte-identical. +# =========================================================================== + + +@pytest.mark.e2e +@pytest.mark.parametrize("fn,arg", [ + ("cos", "1e308"), + ("acos", "0.5"), + ("asin", "0.5"), + ("atan", "Math.PI"), + ("atanh", "0.5"), + ("cbrt", "Math.PI"), + ("cosh", "Math.PI"), + ("exp", "Math.PI"), + ("expm1", "Math.PI"), + ("log", "Math.PI"), + ("log1p", "Math.PI"), + ("log10", "Math.PI"), + ("sin", "Math.PI"), + ("sinh", "Math.PI"), + ("sqrt", "Math.PI"), + ("tan", "Math.PI"), + ("tanh", "Math.PI"), +]) +def test_math_determinism(page, fn, arg): + """Math.() must return the same value across 100 calls.""" + first, last, all_equal = _ev(page, f"""() => {{ + const r = []; + for (let i = 0; i < 100; i++) r.push(Math.{fn}({arg})); + return [r[0], r[99], r.every(x => Object.is(x, r[0]))]; + }}""") + assert all_equal, ( + f"Math.{fn}({arg}) drifts across calls: first={first}, last={last}" + ) + + +@pytest.mark.e2e +def test_math_pow_two_arg_determinism(page): + ok = _ev(page, """() => { + const a = Math.pow(Math.PI, 2); + for (let i = 0; i < 50; i++) { + if (!Object.is(Math.pow(Math.PI, 2), a)) return false; + } + return true; + }""") + assert ok + + +# =========================================================================== +# 2. Worker scope vs main thread — navigator properties MUST agree +# Source: creepjs/src/worker/index.ts +# =========================================================================== + + +def _worker_navigator_dict(page, props): + expr = """async (props) => { + const code = ` + self.onmessage = (e) => { + const out = {}; + for (const p of e.data) { + try { out[p] = self.navigator[p]; } + catch (err) { out[p] = ''; } + } + if (out.languages && Array.isArray(out.languages)) { + out.languages = [...out.languages]; + } + self.postMessage(out); + }; + `; + const blob = new Blob([code], { type: 'application/javascript' }); + const url = URL.createObjectURL(blob); + const worker = new Worker(url); + try { + const result = await new Promise((resolve, reject) => { + worker.onmessage = (e) => resolve(e.data); + worker.onerror = (e) => reject(new Error(e.message)); + worker.postMessage(props); + setTimeout(() => reject(new Error('worker timeout')), 5000); + }); + return result; + } finally { + worker.terminate(); + URL.revokeObjectURL(url); + } + }""" + return page.evaluate(expr, list(props)) + + +@pytest.mark.e2e +def test_worker_userAgent_matches_main(page): + main = _ev(page, "navigator.userAgent") + worker = _worker_navigator_dict(page, ("userAgent",)) + assert worker["userAgent"] == main, ( + f"UA drift main vs worker:\n main: {main!r}\n worker: {worker['userAgent']!r}" + ) + + +@pytest.mark.e2e +def test_worker_hardwareConcurrency_matches_main(page): + main = _ev(page, "navigator.hardwareConcurrency") + worker = _worker_navigator_dict(page, ("hardwareConcurrency",)) + assert worker["hardwareConcurrency"] == main + + +@pytest.mark.e2e +def test_worker_language_matches_main(page): + main = _ev(page, "navigator.language") + worker = _worker_navigator_dict(page, ("language",)) + assert worker["language"] == main + + +@pytest.mark.e2e +def test_worker_languages_matches_main(page): + main = _ev(page, "[...navigator.languages]") + worker = _worker_navigator_dict(page, ("languages",)) + assert list(worker["languages"]) == list(main) + + +@pytest.mark.e2e +def test_worker_platform_matches_main(page): + main = _ev(page, "navigator.platform") + worker = _worker_navigator_dict(page, ("platform",)) + assert worker["platform"] == main + + +# =========================================================================== +# 3. Iframe scope vs window scope +# Source: creepjs/src/lies/index.ts (getBehemothIframe pattern) +# =========================================================================== + + +def _iframe_navigator_dict(page, props): + expr = """(props) => { + const iframe = document.createElement('iframe'); + iframe.style.display = 'none'; + document.body.appendChild(iframe); + const out = {}; + for (const p of props) { + try { out[p] = iframe.contentWindow.navigator[p]; } + catch (e) { out[p] = ''; } + } + if (Array.isArray(out.languages)) out.languages = [...out.languages]; + document.body.removeChild(iframe); + return out; + }""" + return page.evaluate(expr, list(props)) + + +@pytest.mark.e2e +def test_iframe_userAgent_matches_window(page): + main = _ev(page, "navigator.userAgent") + iframe = _iframe_navigator_dict(page, ("userAgent",)) + assert iframe["userAgent"] == main + + +@pytest.mark.e2e +def test_iframe_language_matches_window(page): + main = _ev(page, "navigator.language") + iframe = _iframe_navigator_dict(page, ("language",)) + assert iframe["language"] == main + + +@pytest.mark.e2e +def test_iframe_hardwareConcurrency_matches_window(page): + main = _ev(page, "navigator.hardwareConcurrency") + iframe = _iframe_navigator_dict(page, ("hardwareConcurrency",)) + assert iframe["hardwareConcurrency"] == main + + +@pytest.mark.e2e +def test_iframe_screen_matches_window(page): + main = _ev(page, "[screen.width, screen.height]") + iframe = _ev(page, """() => { + const f = document.createElement('iframe'); + f.style.display = 'none'; + document.body.appendChild(f); + const v = [f.contentWindow.screen.width, f.contentWindow.screen.height]; + document.body.removeChild(f); + return v; + }""") + assert iframe == main + + +# =========================================================================== +# 4. UA self-consistency (creepjs/src/navigator/index.ts) +# =========================================================================== + + +@pytest.mark.e2e +def test_navigator_platform_matches_userAgent_OS(page): + ua = _ev(page, "navigator.userAgent") + platform = _ev(page, "navigator.platform") + if "Windows" in ua: + assert "Win" in platform + elif "Mac" in ua: + assert "Mac" in platform + elif "Linux" in ua or "X11" in ua: + assert "Linux" in platform or "X11" in platform + + +@pytest.mark.e2e +def test_navigator_oscpu_matches_userAgent(page): + """Firefox-only: navigator.oscpu must correlate with UA OS.""" + ua = _ev(page, "navigator.userAgent") + oscpu = _ev(page, "navigator.oscpu || ''") + if not oscpu: + pytest.skip("navigator.oscpu not exposed") + if "Windows" in ua: + assert "Windows" in oscpu + elif "Linux" in ua: + assert "Linux" in oscpu + elif "Mac" in ua: + assert "Mac" in oscpu + + +# =========================================================================== +# 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString) +# =========================================================================== + + +def _is_native_toString(text, fn_name): + """Mirror of CreepJS hasKnownToString — accept the engine-specific + native patterns (single-line on V8, multi-line on SpiderMonkey).""" + import re as _re + name = _re.escape(fn_name) + patterns = [ + rf"^function {name}\(\) \{{ \[native code\] \}}$", + rf"^function get {name}\(\) \{{ \[native code\] \}}$", + rf"^function {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$", + rf"^function get {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$", + ] + return any(_re.match(p, text) for p in patterns) + + +@pytest.mark.e2e +@pytest.mark.parametrize("native_fn,name", [ + ("Function.prototype.toString", "toString"), + ("Function.prototype.bind", "bind"), + ("Function.prototype.call", "call"), + ("Function.prototype.apply", "apply"), + ("Object.getOwnPropertyDescriptor", "getOwnPropertyDescriptor"), + ("Object.defineProperty", "defineProperty"), + ("Array.prototype.slice", "slice"), + ("JSON.stringify", "stringify"), +]) +def test_native_function_self_toString_matches(page, native_fn, name): + """Each native function's `.toString()` must match its engine's + native pattern. A Proxy wrapper or function-rewrite leaks here.""" + text = _ev(page, f"{native_fn}.toString()") + assert _is_native_toString(text, name), ( + f"{native_fn}.toString() not native-shape: {text!r}" + ) + + +# =========================================================================== +# 6. AudioContext / WebGL determinism +# =========================================================================== + + +@pytest.mark.e2e +def test_audio_offline_context_deterministic(page): + """OfflineAudioContext: same graph → byte-identical output.""" + ok = _ev(page, """async () => { + async function render() { + const ctx = new (window.OfflineAudioContext || + window.webkitOfflineAudioContext)(1, 5000, 44100); + const osc = ctx.createOscillator(); + osc.connect(ctx.destination); + osc.start(0); + const buf = await ctx.startRendering(); + return Array.from(buf.getChannelData(0).slice(0, 50)); + } + const a = await render(); + const b = await render(); + return JSON.stringify(a) === JSON.stringify(b); + }""") + assert ok + + +@pytest.mark.e2e +def test_webgl_getParameter_deterministic(page): + """WebGL parameters must not drift across reads.""" + ok = _ev(page, """() => { + const c = document.createElement('canvas'); + const gl = c.getContext('webgl'); + if (!gl) return false; + const params = [gl.MAX_TEXTURE_SIZE, gl.MAX_VIEWPORT_DIMS, + gl.MAX_RENDERBUFFER_SIZE, gl.MAX_VERTEX_ATTRIBS]; + const ref = JSON.stringify(params.map(p => gl.getParameter(p))); + for (let i = 0; i < 50; i++) { + if (JSON.stringify(params.map(p => gl.getParameter(p))) !== ref) { + return false; + } + } + return true; + }""") + assert ok + + +# =========================================================================== +# 7. Locale ↔ Intl cross-consistency +# =========================================================================== + + +@pytest.mark.e2e +def test_navigator_language_matches_Intl_locale(page): + """navigator.language base must agree with Intl.DateTimeFormat locale.""" + nav = _ev(page, "navigator.language").split("-")[0] + intl = _ev(page, + "Intl.DateTimeFormat().resolvedOptions().locale").split("-")[0] + assert nav == intl, ( + f"navigator.language base={nav!r} vs Intl={intl!r}" + ) + + +@pytest.mark.e2e +def test_navigator_language_matches_Intl_NumberFormat(page): + nav = _ev(page, "navigator.language").split("-")[0] + num = _ev(page, + "Intl.NumberFormat().resolvedOptions().locale").split("-")[0] + assert nav == num + + +@pytest.mark.e2e +def test_navigator_language_matches_Intl_Collator(page): + nav = _ev(page, "navigator.language").split("-")[0] + col = _ev(page, + "(new Intl.Collator()).resolvedOptions().locale").split("-")[0] + assert nav == col + + +# =========================================================================== +# 8. Property descriptor shape lies +# Spoofers using Object.defineProperty(navigator, prop, {value: ...}) +# leave a 'value' field on the descriptor — real native props use a getter. +# =========================================================================== + + +_DESCRIPTOR_NATIVE_PROPS = [ + "userAgent", "platform", "hardwareConcurrency", "language", "languages", + "vendor", "appVersion", "appName", "appCodeName", "doNotTrack", + "cookieEnabled", "onLine", "product", "productSub", "buildID", "oscpu", +] + + +@pytest.mark.e2e +@pytest.mark.parametrize("prop", _DESCRIPTOR_NATIVE_PROPS) +def test_navigator_property_descriptor_is_getter_not_value(page, prop): + """Each spoofable navigator.* property must be defined via a native + getter — NOT Object.defineProperty(..., {value: x}). The value-field + descriptor is the lazy spoof leak CreepJS catches.""" + has_lie = _ev(page, f"""() => {{ + let proto = navigator; + let descriptor = null; + while (proto && !descriptor) {{ + descriptor = Object.getOwnPropertyDescriptor(proto, {prop!r}); + proto = Object.getPrototypeOf(proto); + }} + if (!descriptor) return null; + return 'value' in descriptor; + }}""") + if has_lie is None: + pytest.skip(f"navigator.{prop} not exposed") + assert has_lie is False, ( + f"navigator.{prop} descriptor exposes 'value' field — lazy spoof" + ) + + +# =========================================================================== +# 9. performance.timeOrigin + monotonic +# =========================================================================== + + +@pytest.mark.e2e +def test_performance_timeOrigin_stable(page): + assert _ev(page, + "performance.timeOrigin === performance.timeOrigin") + + +@pytest.mark.e2e +def test_performance_now_monotonic(page): + ok = _ev(page, """() => { + let prev = performance.now(); + for (let i = 0; i < 100; i++) { + const cur = performance.now(); + if (cur < prev) return false; + prev = cur; + } + return true; + }""") + assert ok + + +# =========================================================================== +# 10. Window dimension invariants +# =========================================================================== + + +@pytest.mark.e2e +def test_window_inner_not_larger_than_outer(page): + inner, outer = _ev(page, "[window.innerWidth, window.outerWidth]") + assert inner <= outer + + +@pytest.mark.e2e +def test_screen_avail_not_larger_than_screen(page): + aw, w = _ev(page, "[screen.availWidth, screen.width]") + ah, h = _ev(page, "[screen.availHeight, screen.height]") + assert aw <= w and ah <= h + + +# =========================================================================== +# 11. Firefox UA invariants +# =========================================================================== + + +@pytest.mark.e2e +def test_firefox_UA_implies_empty_vendor(page): + """Firefox: navigator.vendor === ''""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + if "Chrome" in _ev(page, "navigator.userAgent"): + pytest.skip("Chrome+Firefox UA — likely synthetic") + assert _ev(page, "navigator.vendor") == "" + + +@pytest.mark.e2e +def test_firefox_appVersion_short_form(page): + """Real Firefox's appVersion is '5.0 (Windows)' form, not the full UA.""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + av = _ev(page, "navigator.appVersion") + ua = _ev(page, "navigator.userAgent") + assert av.startswith("5.0 (") + assert len(av) < len(ua) + + +@pytest.mark.e2e +def test_firefox_UA_implies_appName_Netscape(page): + """navigator.appName === 'Netscape' (historical invariant).""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + assert _ev(page, "navigator.appName") == "Netscape" diff --git a/tests/test_fingerprint_surface.py b/tests/test_fingerprint_surface.py new file mode 100644 index 0000000..1de5d39 --- /dev/null +++ b/tests/test_fingerprint_surface.py @@ -0,0 +1,238 @@ +"""Fingerprint surface tests — replicate the checks performed by the canonical +anti-bot detection libraries against an OFFLINE browser session. + +Each test asserts the SAME thing the upstream detector would flag. A pass +here means our patched build appears human to that detector; a fail +means a real stealth hole that anti-bot kits would exploit in production. + +Detector libraries studied (all FOSS, MIT-licensed): + - github.com/fingerprintjs/BotD — 19 detectors, the most + widely deployed client-side + bot detector + - github.com/abrahamjuliot/creepjs — headless / stealth / lies + modules + - github.com/fingerprintjs/fingerprintjs — canvas / audio / color / + touch consistency + - github.com/antoinevastel/fpscanner — UA / platform / oscpu + cross-checks + - bot.sannysoft.com — classic Puppeteer harness + +Everything runs against `about:blank` with NO network and NO proxy. The +suite is intended to be part of the release-gate: pre-push hook runs +`pytest -m e2e` and these tests must be green on every release. + +Run only this file: + pytest tests/test_fingerprint_surface.py -m e2e -v +""" +from __future__ import annotations + +import re + +import pytest + +from invisible_playwright import InvisiblePlaywright + + +# ──────────────────────────────────────────────────────────────────── +# Inline PIN — a coherent mid-range Windows desktop. Not user-config: +# these specific values are what the surface tests assert against. +# Keep PIN small (only fields that JS exposes) and stable across runs. +# ──────────────────────────────────────────────────────────────────── + +PIN = { + "screen.width": 1920, + "screen.height": 1080, + "screen.avail_width": 1920, + "screen.avail_height": 1040, + "screen.dpr": 1.0, + "hardware.concurrency": 8, + "audio.sample_rate": 48000, + "audio.max_channel_count": 2, +} + + +@pytest.fixture(scope="module") +def page(firefox_binary): + """One headless browser shared across the whole module. + ~20s startup paid once, then every test runs in ~50ms.""" + with InvisiblePlaywright( + seed=42, + pin=PIN, + binary_path=firefox_binary, + headless=True, + ) as browser: + ctx = browser.new_context() + p = ctx.new_page() + p.goto("about:blank", timeout=30_000) + yield p + + +def _ev(page, expr): + return page.evaluate(expr) + + +# =========================================================================== +# sannysoft.com — classic Puppeteer detection harness +# =========================================================================== + + +@pytest.mark.e2e +def test_sannysoft_chrome_object_consistency(page): + """Firefox UA + window.chrome present = bot-framework leak.""" + if "Firefox" in _ev(page, "navigator.userAgent"): + assert not _ev(page, "typeof window.chrome !== 'undefined'") + + +@pytest.mark.e2e +def test_sannysoft_permissions_query_works(page): + """navigator.permissions.query() must return a proper PermissionStatus.""" + ok = _ev(page, """async () => { + if (!navigator.permissions || !navigator.permissions.query) return false; + try { + const r = await navigator.permissions.query({name: 'notifications'}); + return r && typeof r.state === 'string'; + } catch (e) { return false; } + }""") + assert ok + + +@pytest.mark.e2e +def test_sannysoft_iframe_chrome_not_leaked(page): + """iframe.contentWindow.chrome must not leak on Firefox UA.""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + leaks = _ev(page, """() => { + const iframe = document.createElement('iframe'); + iframe.style.display = 'none'; + document.body.appendChild(iframe); + const is = typeof iframe.contentWindow.chrome !== 'undefined'; + document.body.removeChild(iframe); + return is; + }""") + assert not leaks + + +@pytest.mark.e2e +def test_sannysoft_iframe_languages_not_empty(page): + """Iframe-scope navigator.languages must have ≥1 entry.""" + n = _ev(page, """() => { + const f = document.createElement('iframe'); + f.style.display = 'none'; + document.body.appendChild(f); + const len = f.contentWindow.navigator.languages.length; + document.body.removeChild(f); + return len; + }""") + assert n > 0 + + +# =========================================================================== +# FingerprintJS — fingerprint surface coherence +# =========================================================================== + + +@pytest.mark.e2e +def test_fpjs_canvas_2d_context_returns_valid(page): + ok = _ev(page, """() => { + const c = document.createElement('canvas'); + c.width = 100; c.height = 100; + const ctx = c.getContext('2d'); + if (!ctx) return false; + ctx.fillText('test', 10, 10); + const data = c.toDataURL(); + return data.length > 100 && data.startsWith('data:image/png;base64'); + }""") + assert ok + + +@pytest.mark.e2e +def test_fpjs_audio_context_works(page): + ok = _ev(page, """async () => { + try { + const ctx = new (window.OfflineAudioContext || + window.webkitOfflineAudioContext)(1, 5000, 44100); + const osc = ctx.createOscillator(); + osc.connect(ctx.destination); + osc.start(0); + const buf = await ctx.startRendering(); + return buf && buf.length > 0; + } catch (e) { return false; } + }""") + assert ok + + +@pytest.mark.e2e +def test_fpjs_color_gamut_query_works(page): + """matchMedia('(color-gamut: ...)') must match at least srgb.""" + ok = _ev(page, """matchMedia('(color-gamut: srgb)').matches || + matchMedia('(color-gamut: p3)').matches || + matchMedia('(color-gamut: rec2020)').matches""") + assert ok + + +@pytest.mark.e2e +def test_fpjs_screen_color_depth_realistic(page): + """Atypical color depths are headless-distinctive.""" + cd = _ev(page, "screen.colorDepth") + assert cd in (24, 30, 32) + + +# =========================================================================== +# PIN-locked surfaces (the values declared in PIN above) +# =========================================================================== + + +@pytest.mark.e2e +def test_pin_screen_width_lands_in_screen_object(page): + assert _ev(page, "screen.width") == PIN["screen.width"] + + +@pytest.mark.e2e +def test_pin_screen_height_lands_in_screen_object(page): + assert _ev(page, "screen.height") == PIN["screen.height"] + + +@pytest.mark.e2e +def test_pin_hardware_concurrency_lands_in_navigator(page): + assert (_ev(page, "navigator.hardwareConcurrency") + == PIN["hardware.concurrency"]) + + +@pytest.mark.e2e +def test_pin_audio_sample_rate_lands_in_AudioContext(page): + assert _ev(page, + "(new (window.AudioContext||window.webkitAudioContext)()).sampleRate" + ) == PIN["audio.sample_rate"] + + +@pytest.mark.e2e +def test_pin_audio_max_channels_lands_in_destination(page): + assert _ev(page, + "(new (window.AudioContext||window.webkitAudioContext)())" + ".destination.maxChannelCount" + ) == PIN["audio.max_channel_count"] + + +# =========================================================================== +# fpscanner-style cross-checks +# =========================================================================== + + +@pytest.mark.e2e +def test_fpscanner_ua_vs_platform_consistent(page): + """UA OS substring must agree with navigator.platform OS substring.""" + ua = _ev(page, "navigator.userAgent") + platform = _ev(page, "navigator.platform") + if "Windows" in ua: + assert "Win" in platform, f"UA Win but platform={platform!r}" + elif "Mac" in ua: + assert "Mac" in platform + elif "Linux" in ua: + assert "Linux" in platform or "X11" in platform + + +@pytest.mark.e2e +def test_fpscanner_no_userAgentData_on_firefox(page): + """navigator.userAgentData is Chromium-only. Presence on Firefox UA = bot.""" + if "Firefox" in _ev(page, "navigator.userAgent"): + assert not _ev(page, "'userAgentData' in navigator") diff --git a/tests/test_geo.py b/tests/test_geo.py new file mode 100644 index 0000000..39ef5ee --- /dev/null +++ b/tests/test_geo.py @@ -0,0 +1,288 @@ +"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution). + +Covers: the precedence policy (resolve_session_timezone), proxy→requests +translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked +mmdb). No real network or mmdb is touched. +""" +import sys +import types + +import pytest + +from invisible_playwright import _geo +from invisible_playwright._geo import ( + GeoTimezoneError, + _proxies_for_requests, + _proxy_is_set, + discover_egress_ip, + ip_to_timezone, + resolve_session_timezone, +) + +SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"} +HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"} + + +# ────────────────────────────────────────────────────────────────────── +# _proxy_is_set +# ────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +@pytest.mark.parametrize( + "proxy,expected", + [ + (None, False), + ({}, False), + ({"server": ""}, False), + ({"server": " "}, False), + ({"server": "direct://"}, False), + ({"server": "DIRECT://"}, False), + ({"server": "socks5://h:1"}, True), + ({"server": "http://h:8080"}, True), + ], +) +def test_proxy_is_set(proxy, expected): + assert _proxy_is_set(proxy) is expected + + +# ────────────────────────────────────────────────────────────────────── +# _proxies_for_requests — scheme + credential translation +# ────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_proxies_socks5_uses_socks5h_remote_dns(): + out = _proxies_for_requests(SOCKS) + assert out["http"] == "socks5h://u:p@gw.example:1080" + assert out["https"] == out["http"] + + +@pytest.mark.unit +def test_proxies_socks4_scheme(): + out = _proxies_for_requests({"server": "socks4://gw:1080"}) + assert out["http"] == "socks4://gw:1080" + + +@pytest.mark.unit +def test_proxies_http_and_https_schemes(): + assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080" + out = _proxies_for_requests({"server": "https://gw:8443"}) + assert out["https"] == "https://gw:8443" + + +@pytest.mark.unit +def test_proxies_no_scheme_defaults_to_http(): + out = _proxies_for_requests({"server": "gw.example:3128"}) + assert out["http"] == "http://gw.example:3128" + + +@pytest.mark.unit +def test_proxies_credentials_are_url_encoded(): + out = _proxies_for_requests( + {"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"} + ) + # '@', ':' and '/' in creds must be percent-encoded so they don't break + # the proxy URL parsing. + assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"] + + +@pytest.mark.unit +def test_proxies_no_credentials_has_no_auth_prefix(): + out = _proxies_for_requests({"server": "socks5://gw:1080"}) + assert out["http"] == "socks5h://gw:1080" + + +# ────────────────────────────────────────────────────────────────────── +# discover_egress_ip — mocked requests +# ────────────────────────────────────────────────────────────────────── +class _FakeResp: + def __init__(self, text, status=200): + self.text = text + self._status = status + + def raise_for_status(self): + if self._status >= 400: + raise RuntimeError(f"HTTP {self._status}") + + +@pytest.mark.unit +def test_discover_egress_ip_first_endpoint_wins(monkeypatch): + calls = [] + + def fake_get(url, **kw): + calls.append(url) + return _FakeResp("203.0.113.7\n") + + monkeypatch.setattr(_geo.requests, "get", fake_get) + assert discover_egress_ip(SOCKS) == "203.0.113.7" + assert len(calls) == 1 # stopped at the first success + + +@pytest.mark.unit +def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch): + seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")]) + + def fake_get(url, **kw): + return next(seq) + + monkeypatch.setattr(_geo.requests, "get", fake_get) + assert discover_egress_ip(HTTP) == "198.51.100.42" + + +@pytest.mark.unit +def test_discover_egress_ip_all_fail_raises(monkeypatch): + def fake_get(url, **kw): + raise OSError("connection refused") + + monkeypatch.setattr(_geo.requests, "get", fake_get) + with pytest.raises(GeoTimezoneError): + discover_egress_ip(SOCKS) + + +@pytest.mark.unit +def test_discover_egress_ip_no_proxy_is_direct(monkeypatch): + # proxy=None → direct request, requests.get must get proxies=None. + seen = {} + + def fake_get(url, **kw): + seen["proxies"] = kw.get("proxies", "MISSING") + return _FakeResp("192.0.2.55") + + monkeypatch.setattr(_geo.requests, "get", fake_get) + assert discover_egress_ip(None) == "192.0.2.55" + assert seen["proxies"] is None + + +# ────────────────────────────────────────────────────────────────────── +# ip_to_timezone — mocked mmdb reader +# ────────────────────────────────────────────────────────────────────── +class _FakeReader: + def __init__(self, record): + self._record = record + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def get(self, ip): + return self._record + + +def _install_fake_maxminddb(monkeypatch, record): + mod = types.ModuleType("maxminddb") + mod.open_database = lambda path: _FakeReader(record) + monkeypatch.setitem(sys.modules, "maxminddb", mod) + + +@pytest.mark.unit +def test_ip_to_timezone_reads_location_time_zone(monkeypatch): + _install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}}) + assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome" + + +@pytest.mark.unit +def test_ip_to_timezone_ip_absent_raises(monkeypatch): + _install_fake_maxminddb(monkeypatch, None) + with pytest.raises(GeoTimezoneError): + ip_to_timezone("1.2.3.4", "x.mmdb") + + +@pytest.mark.unit +def test_ip_to_timezone_missing_zone_raises(monkeypatch): + _install_fake_maxminddb(monkeypatch, {"location": {}}) + with pytest.raises(GeoTimezoneError): + ip_to_timezone("1.2.3.4", "x.mmdb") + + +@pytest.mark.unit +def test_ip_to_timezone_invalid_iana_raises(monkeypatch): + _install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}}) + with pytest.raises(GeoTimezoneError): + ip_to_timezone("1.2.3.4", "x.mmdb") + + +# ────────────────────────────────────────────────────────────────────── +# resolve_session_timezone — the precedence policy +# ────────────────────────────────────────────────────────────────────── +@pytest.fixture +def stub_egress(monkeypatch): + """Make egress resolution deterministic + offline; record if it ran.""" + state = {"called": False} + + def fake_discover(proxy=None, **kw): + state["called"] = True + state["proxy_arg"] = proxy + return "203.0.113.7" + + monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover) + monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York") + # ensure_geoip_mmdb is imported from .download at call time + import invisible_playwright.download as dl + + monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb") + return state + + +@pytest.mark.unit +def test_resolve_explicit_iana_wins(stub_egress): + # An explicit zone wins and never triggers resolution (proxy or not). + assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo" + assert resolve_session_timezone("Asia/Tokyo", None) == "Asia/Tokyo" + assert stub_egress["called"] is False + + +@pytest.mark.unit +def test_resolve_empty_with_proxy_resolves_from_proxy(stub_egress): + assert resolve_session_timezone("", SOCKS) == "America/New_York" + assert stub_egress["called"] is True + assert stub_egress["proxy_arg"] == SOCKS # routed through the proxy + + +@pytest.mark.unit +def test_resolve_auto_with_proxy_resolves_from_proxy(stub_egress): + assert resolve_session_timezone("auto", HTTP) == "America/New_York" + assert stub_egress["proxy_arg"] == HTTP + + +@pytest.mark.unit +def test_resolve_empty_no_proxy_resolves_from_host(stub_egress): + # auto ALWAYS resolves — without a proxy, from the host's own public IP. + assert resolve_session_timezone("", None) == "America/New_York" + assert stub_egress["called"] is True + assert stub_egress["proxy_arg"] is None # direct request, no proxy + + +@pytest.mark.unit +def test_resolve_auto_no_proxy_resolves_from_host(stub_egress): + assert resolve_session_timezone("auto", None) == "America/New_York" + assert stub_egress["proxy_arg"] is None + + +@pytest.mark.unit +def test_resolve_direct_proxy_resolves_via_host(stub_egress): + # direct:// counts as "no proxy" → resolve from the host IP, don't skip. + assert resolve_session_timezone("auto", {"server": "direct://"}) == "America/New_York" + assert stub_egress["proxy_arg"] is None + + +@pytest.mark.unit +def test_resolve_no_proxy_failure_falls_back_to_host(monkeypatch): + # Without a proxy, a lookup failure must NOT break the launch → host TZ (""). + def boom(proxy=None, **kw): + raise GeoTimezoneError("offline") + + monkeypatch.setattr(_geo, "discover_egress_ip", boom) + assert resolve_session_timezone("auto", None) == "" + assert resolve_session_timezone("", None) == "" + + +@pytest.mark.unit +def test_resolve_proxy_failure_raises(monkeypatch): + # With a proxy set, a failure must raise — never a silent host-TZ fallback. + def boom(proxy=None, **kw): + raise GeoTimezoneError("no egress") + + monkeypatch.setattr(_geo, "discover_egress_ip", boom) + with pytest.raises(GeoTimezoneError): + resolve_session_timezone("auto", SOCKS) + with pytest.raises(GeoTimezoneError): + resolve_session_timezone("", SOCKS) diff --git a/tests/test_geoip_update.py b/tests/test_geoip_update.py new file mode 100644 index 0000000..26632b7 --- /dev/null +++ b/tests/test_geoip_update.py @@ -0,0 +1,131 @@ +"""Unit tests for the intelligent geoip mmdb auto-update in `download.py`. + +daijro/geoip-all-in-one rebuilds weekly; `ensure_geoip_mmdb` keeps the cache +fresh without a download (or API call) on every launch. These tests mock the +cache root, the latest-tag API, and the per-tag download so nothing touches the +network. +""" +import os +import time + +import pytest + +import invisible_playwright.download as dl + + +@pytest.fixture +def cache(tmp_path, monkeypatch): + """Point the cache at tmp_path and clear the env override.""" + monkeypatch.setattr(dl, "cache_root", lambda: tmp_path) + monkeypatch.delenv("STEALTHFOX_GEOIP_MMDB", raising=False) + return tmp_path + + +def _make_cached(root, tag, name=dl.GEOIP_MMDB_NAME): + d = root / "geoip" / tag + d.mkdir(parents=True, exist_ok=True) + f = d / name + f.write_bytes(b"FAKE-MMDB") + return f + + +def _set_marker_age(root, days): + m = root / "geoip" / ".last_check" + m.parent.mkdir(parents=True, exist_ok=True) + m.touch() + old = time.time() - days * 86400 + os.utime(m, (old, old)) + + +# ────────────────────────────────────────────────────────────────────── +# env override +# ────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_env_override_returns_file(tmp_path, monkeypatch): + f = tmp_path / "mine.mmdb" + f.write_bytes(b"X") + monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(f)) + assert dl.ensure_geoip_mmdb() == f + + +@pytest.mark.unit +def test_env_override_missing_raises(tmp_path, monkeypatch): + monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(tmp_path / "nope.mmdb")) + with pytest.raises(RuntimeError): + dl.ensure_geoip_mmdb() + + +# ────────────────────────────────────────────────────────────────────── +# freshness window +# ────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_fresh_cache_no_network(cache, monkeypatch): + f = _make_cached(cache, "2026.06.03") + _set_marker_age(cache, 0) # just checked + + def boom(): + raise AssertionError("latest-tag API must NOT be called within the window") + + monkeypatch.setattr(dl, "_latest_geoip_tag", boom) + assert dl.ensure_geoip_mmdb(max_age_days=7) == f + + +@pytest.mark.unit +def test_stale_same_tag_no_download(cache, monkeypatch): + f = _make_cached(cache, "2026.06.03") + _set_marker_age(cache, 30) # stale → will re-check + monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.03") + # real _download_geoip_tag runs but target exists, so no actual download: + monkeypatch.setattr(dl, "_download_file", lambda *a, **k: (_ for _ in ()).throw( + AssertionError("must not download when tag already cached"))) + assert dl.ensure_geoip_mmdb(max_age_days=7) == f + + +@pytest.mark.unit +def test_stale_new_tag_downloads_and_prunes(cache, monkeypatch): + old = _make_cached(cache, "2026.06.03") + _set_marker_age(cache, 30) + monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.10") + + def fake_download(tag): + return _make_cached(cache, tag) # simulate fetch+extract of the new tag + + monkeypatch.setattr(dl, "_download_geoip_tag", fake_download) + got = dl.ensure_geoip_mmdb(max_age_days=7) + assert got.parent.name == "2026.06.10" + assert not old.parent.exists() # old tag pruned + assert got.exists() + + +# ────────────────────────────────────────────────────────────────────── +# offline resilience +# ────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_api_down_with_cache_uses_cache(cache, monkeypatch): + f = _make_cached(cache, "2026.06.03") + _set_marker_age(cache, 30) + + def boom(): + raise OSError("offline") + + monkeypatch.setattr(dl, "_latest_geoip_tag", boom) + assert dl.ensure_geoip_mmdb(max_age_days=7) == f # stale cache reused, no raise + + +@pytest.mark.unit +def test_cold_cache_api_down_falls_back_to_pinned(cache, monkeypatch): + # no cache at all + API unreachable → pinned GEOIP_MMDB_VERSION fallback. + def boom(): + raise OSError("offline") + + monkeypatch.setattr(dl, "_latest_geoip_tag", boom) + captured = {} + + def fake_download(tag): + captured["tag"] = tag + return _make_cached(cache, tag) + + monkeypatch.setattr(dl, "_download_geoip_tag", fake_download) + got = dl.ensure_geoip_mmdb(max_age_days=7) + assert captured["tag"] == dl.GEOIP_MMDB_VERSION + assert got.exists() diff --git a/tests/test_launcher_config.py b/tests/test_launcher_config.py index daf88c4..85047e5 100644 --- a/tests/test_launcher_config.py +++ b/tests/test_launcher_config.py @@ -55,3 +55,217 @@ def test_invisible_playwright_constructs_without_launching(): assert obj is not None obj2 = InvisiblePlaywright(seed=42, headless=True) assert obj2 is not None + + +# ─── profile_dir kwarg — persistent context support ─────────────────────── # + +import pytest +from pathlib import Path + + +@pytest.mark.unit +def test_profile_dir_none_by_default(): + """No persistent profile unless explicitly opted in. Prevents accidental + state-leak between scripts that share the same seed.""" + obj = InvisiblePlaywright(seed=42) + assert obj._profile_dir is None + assert obj._persistent_context is None + + +@pytest.mark.unit +def test_profile_dir_string_is_coerced_to_path(tmp_path): + """Accept str or Path. Always store as Path internally.""" + obj = InvisiblePlaywright(seed=42, profile_dir=str(tmp_path)) + assert isinstance(obj._profile_dir, Path) + assert obj._profile_dir == tmp_path + + +@pytest.mark.unit +def test_profile_dir_path_is_stored_as_is(tmp_path): + obj = InvisiblePlaywright(seed=42, profile_dir=tmp_path) + assert obj._profile_dir == tmp_path + + +@pytest.mark.unit +def test_profile_dir_does_not_create_dir_until_enter(tmp_path): + """Construction must not touch the filesystem. Directory creation only + happens when the user actually enters the context manager — otherwise + a typo at instantiation would silently spawn dirs.""" + target = tmp_path / "nonexistent" + assert not target.exists() + InvisiblePlaywright(seed=42, profile_dir=target) + assert not target.exists() + + +@pytest.mark.unit +def test_persistent_context_kwargs_match_default_exactly(): + """Persistent kwargs must be IDENTICAL to non-persistent default + kwargs. From firefox-5 (C7 closure) the docShell.overrideTimezone + method is present in the patched binary, so the per-realm overrides + Playwright applies for `locale=`/`timezone_id=` land successfully and + no longer hang the persistent context launch handshake. + + Before firefox-5 we had to filter these out (180s timeout otherwise). + A future refactor that re-introduces that filter would silently lose + timezone/locale isolation in persistent sessions — this test is the + sentinel that catches the regression at the unit level.""" + obj = InvisiblePlaywright(seed=42, locale="en-GB", timezone="Europe/London", + profile_dir="/tmp/x") + persistent = obj._persistent_context_kwargs() + default = obj._default_context_kwargs() + assert persistent == default, ( + "persistent_context kwargs must match default_context kwargs since " + f"firefox-5.\n persistent: {persistent!r}\n default: {default!r}" + ) + + +@pytest.mark.unit +def test_persistent_context_kwargs_INCLUDES_locale_and_timezone(): + """Sentinel for the C7 closure: firefox-5 ships the C++ overrideTimezone + IDL method, so locale + timezone_id MUST be passed through to + launch_persistent_context. If they're not, the wrapper is silently + dropping per-context isolation — two sessions with different + `timezone=` would end up sharing whatever TZ the env var set. + + Regression-defense: do NOT re-add the firefox-4-era filter.""" + obj = InvisiblePlaywright(seed=42, locale="en-GB", timezone="Europe/London", + profile_dir="/tmp/x") + kw = obj._persistent_context_kwargs() + assert kw.get("locale") == "en-GB", ( + f"locale must be in persistent kwargs (firefox-5+ supports it via " + f"docShell.languageOverride). Got: {kw.get('locale')!r}" + ) + assert kw.get("timezone_id") == "Europe/London", ( + f"timezone_id must be in persistent kwargs (firefox-5+ supports it " + f"via docShell.overrideTimezone IDL method, patch.md section 19). " + f"Got: {kw.get('timezone_id')!r}" + ) + + +@pytest.mark.unit +def test_persistent_context_kwargs_omits_timezone_when_empty_string(): + """Empty timezone='' is the 'use host TZ' sentinel — must NOT pass + timezone_id to Playwright in that case (would pin to literal '' and + break Intl).""" + obj = InvisiblePlaywright(seed=42, timezone="", profile_dir="/tmp/x") + kw = obj._persistent_context_kwargs() + assert "timezone_id" not in kw + + +# ─── Mocked __enter__ flow — confirms the right Playwright call is made ── # + + +@pytest.mark.unit +def test_enter_with_profile_dir_calls_launch_persistent_context(tmp_path, monkeypatch): + """When profile_dir is set, __enter__ must call + `firefox.launch_persistent_context(user_data_dir=...)` and NOT + `firefox.launch(...)`. This is the structural test that the persistent + branch is wired correctly — without it, profile_dir would be silently + accepted but ignored.""" + from unittest.mock import MagicMock + # Mock ensure_binary so we don't hit the network + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + + # Mock sync_playwright().start() → fake playwright with our recording firefox + fake_ctx = MagicMock(name="persistent_context") + fake_firefox = MagicMock() + fake_firefox.launch_persistent_context.return_value = fake_ctx + fake_playwright = MagicMock() + fake_playwright.firefox = fake_firefox + fake_pw = MagicMock() + fake_pw.start.return_value = fake_playwright + + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + profile = tmp_path / "myprofile" + obj = InvisiblePlaywright(seed=42, profile_dir=profile) + returned = obj.__enter__() + + # The persistent branch was taken + fake_firefox.launch_persistent_context.assert_called_once() + fake_firefox.launch.assert_not_called() + + # The user_data_dir was passed verbatim + call_kwargs = fake_firefox.launch_persistent_context.call_args.kwargs + assert call_kwargs["user_data_dir"] == str(profile) + + # The directory was created on disk (Playwright fails otherwise) + assert profile.exists() and profile.is_dir() + + # __enter__ returned the BrowserContext, not a Browser + assert returned is fake_ctx + + +@pytest.mark.unit +def test_enter_without_profile_dir_calls_launch_not_persistent(tmp_path, monkeypatch): + """Default path: profile_dir=None → firefox.launch, not + launch_persistent_context. Sentinel that the non-persistent flow + isn't accidentally rerouted.""" + from unittest.mock import MagicMock + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + + fake_browser = MagicMock(name="browser") + fake_browser.new_context = MagicMock() + fake_firefox = MagicMock() + fake_firefox.launch.return_value = fake_browser + fake_playwright = MagicMock() + fake_playwright.firefox = fake_firefox + fake_pw = MagicMock() + fake_pw.start.return_value = fake_playwright + + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + obj = InvisiblePlaywright(seed=42) + returned = obj.__enter__() + + fake_firefox.launch.assert_called_once() + fake_firefox.launch_persistent_context.assert_not_called() + assert returned is fake_browser + + +@pytest.mark.unit +def test_persistent_context_user_data_dir_is_created_if_missing(tmp_path, monkeypatch): + """First-run scenario: the directory the user names doesn't exist yet. + __enter__ must mkdir -p it (Playwright won't, and would crash with + 'user_data_dir does not exist').""" + from unittest.mock import MagicMock + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + fake_pw = MagicMock() + fake_pw.start.return_value = MagicMock() + fake_pw.start.return_value.firefox.launch_persistent_context = MagicMock( + return_value=MagicMock() + ) + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + nested = tmp_path / "a" / "b" / "c" / "profile" + assert not nested.parent.exists() # parent doesn't exist either + obj = InvisiblePlaywright(seed=42, profile_dir=nested) + obj.__enter__() + assert nested.is_dir() + + +@pytest.mark.unit +def test_teardown_closes_persistent_context(tmp_path, monkeypatch): + """The teardown must close the persistent context. Forgetting this + leaves Firefox + Playwright running until the parent process exits, + which on long-running tools (job orchestrators, MCP servers) leaks + handles indefinitely.""" + from unittest.mock import MagicMock + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + fake_ctx = MagicMock(name="persistent_context") + fake_pw = MagicMock() + fake_pw.start.return_value.firefox.launch_persistent_context.return_value = fake_ctx + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + obj = InvisiblePlaywright(seed=42, profile_dir=tmp_path / "p") + obj.__enter__() + obj.__exit__(None, None, None) + fake_ctx.close.assert_called_once() diff --git a/tests/test_mouse.py b/tests/test_mouse.py index ad0f00e..ae57486 100644 --- a/tests/test_mouse.py +++ b/tests/test_mouse.py @@ -16,24 +16,11 @@ and covers each patched call site: """ from __future__ import annotations -import sys import urllib.parse import pytest from invisible_playwright import InvisiblePlaywright -from invisible_playwright.constants import BINARY_ENTRY_REL - - -@pytest.fixture(scope="session") -def firefox_binary(): - if sys.platform not in BINARY_ENTRY_REL: - pytest.skip(f"unsupported platform: {sys.platform}") - from invisible_playwright.download import cache_dir_for_version - entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform] - if not entry.exists(): - pytest.skip("patched Firefox binary not cached; run `invisible-playwright fetch`") - return str(entry) def _data_url(html: str) -> str: @@ -195,7 +182,11 @@ def test_hover_triggers_mouseenter(firefox_binary): "onmouseenter=\"window.__h=true\">x" )) page.locator("#h").hover() - assert page.evaluate("window.__h") is True + # Wait for the event rather than reading immediately: under load / on a + # virtual display the mouseenter can land a beat after hover() returns, + # which made an instant read flaky. wait_for_function still fails (times + # out) if mouseenter genuinely never fires. + page.wait_for_function("() => window.__h === true", timeout=5000) # ──────────────────────────────────────────────────────────────────── diff --git a/tests/test_proxy_socks_auth_e2e.py b/tests/test_proxy_socks_auth_e2e.py new file mode 100644 index 0000000..2d8fafa --- /dev/null +++ b/tests/test_proxy_socks_auth_e2e.py @@ -0,0 +1,197 @@ +"""E2E: the patched Firefox SENDS SOCKS5 username/password and routes through it. + +Playwright's own ``proxy=`` ignores SOCKS auth; this is the patched +``nsProtocolProxyService`` feature (reads ``network.proxy.socks_username`` / +``socks_password``). ``test_proxy.py`` already unit-tests on CI that the wrapper +sets those prefs; this proves the binary actually performs the RFC1929 auth +handshake and relays traffic. + +Fully hermetic — a local SOCKS5 server + a local HTTP target, with the localhost +target forced through the proxy via ``allow_hijacking_localhost`` — so it runs +identically on a dev box and on a GitHub runner (no external site, no secrets). +""" +from __future__ import annotations + +import http.server +import socket +import socketserver +import struct +import threading + +import pytest + +from invisible_playwright import InvisiblePlaywright + +_USER = "ferd_socks_user" +_PASS = "ferd_socks_pw_42" + + +class _Socks5AuthRecorder: + """SOCKS5 that REQUIRES RFC1929 user/pass auth, records the creds it saw, + then relays CONNECT to the requested target.""" + + def __init__(self): + self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self._srv.bind(("127.0.0.1", 0)) + self._srv.listen(16) + self.port = self._srv.getsockname()[1] + self.seen_creds: list[tuple[str, str]] = [] + self._stop = False + threading.Thread(target=self._serve, daemon=True).start() + + def _serve(self): + while not self._stop: + try: + conn, _ = self._srv.accept() + except OSError: + break + threading.Thread(target=self._handle, args=(conn,), daemon=True).start() + + def _recv(self, s, n): + buf = b"" + while len(buf) < n: + chunk = s.recv(n - len(buf)) + if not chunk: + return None + buf += chunk + return buf + + def _handle(self, conn): + try: + head = self._recv(conn, 2) + if not head or head[0] != 0x05: + conn.close(); return + methods = self._recv(conn, head[1]) or b"" + if 0x02 not in methods: # we REQUIRE user/pass + conn.sendall(b"\x05\xff"); conn.close(); return + conn.sendall(b"\x05\x02") # select user/pass auth + if not self._recv(conn, 1): # RFC1929 version byte + conn.close(); return + ulen = self._recv(conn, 1)[0] + uname = (self._recv(conn, ulen) or b"").decode("utf-8", "ignore") + plen = self._recv(conn, 1)[0] + passwd = (self._recv(conn, plen) or b"").decode("utf-8", "ignore") + self.seen_creds.append((uname, passwd)) + conn.sendall(b"\x01\x00") # auth success + req = self._recv(conn, 4) + if not req: + conn.close(); return + _, cmd, _, atyp = req + if atyp == 0x01: + addr = socket.inet_ntoa(self._recv(conn, 4)) + elif atyp == 0x03: + addr = (self._recv(conn, self._recv(conn, 1)[0]) or b"").decode() + elif atyp == 0x04: + addr = socket.inet_ntop(socket.AF_INET6, self._recv(conn, 16)) + else: + conn.close(); return + port = struct.unpack("!H", self._recv(conn, 2))[0] + if cmd != 0x01: # only CONNECT + conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00"); conn.close(); return + try: + up = socket.create_connection((addr, port), timeout=15) + except OSError: + conn.sendall(b"\x05\x05\x00\x01\x00\x00\x00\x00\x00\x00"); conn.close(); return + conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00") + self._pipe(conn, up) + except Exception: + try: + conn.close() + except OSError: + pass + + @staticmethod + def _pipe(a, b): + def fwd(src, dst): + try: + while True: + data = src.recv(65536) + if not data: + break + dst.sendall(data) + except OSError: + pass + finally: + try: + dst.shutdown(socket.SHUT_WR) + except OSError: + pass + threading.Thread(target=fwd, args=(a, b), daemon=True).start() + fwd(b, a) + + def close(self): + self._stop = True + try: + self._srv.close() + except OSError: + pass + + +class _LocalHTTP: + """A tiny localhost HTTP server — the CONNECT target relayed by the proxy.""" + + _HTML = b"ok

socks-routed

" + + def __init__(self): + html = self._HTML + + class H(http.server.BaseHTTPRequestHandler): + def do_GET(self): # noqa: N802 + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(html))) + self.end_headers() + self.wfile.write(html) + + def log_message(self, *a): + pass + + self._srv = socketserver.TCPServer(("127.0.0.1", 0), H) + self.port = self._srv.server_address[1] + threading.Thread(target=self._srv.serve_forever, daemon=True).start() + + def close(self): + self._srv.shutdown() + + +@pytest.fixture +def socks_auth(): + s = _Socks5AuthRecorder() + yield s + s.close() + + +@pytest.fixture +def local_http(): + h = _LocalHTTP() + yield h + h.close() + + +@pytest.mark.e2e +def test_socks5_auth_creds_sent_and_routed(firefox_binary, socks_auth, local_http): + """The binary must perform SOCKS5 user/pass auth with the configured creds + and relay the page through the proxy.""" + proxy = { + "server": f"socks5://127.0.0.1:{socks_auth.port}", + "username": _USER, + "password": _PASS, + } + # Firefox bypasses the proxy for localhost by default; force it through. + prefs = { + "network.proxy.allow_hijacking_localhost": True, + "network.proxy.no_proxies_on": "", + } + with InvisiblePlaywright( + seed=42, binary_path=firefox_binary, proxy=proxy, extra_prefs=prefs + ) as browser: + page = browser.new_page() + page.goto(f"http://127.0.0.1:{local_http.port}/", wait_until="load", timeout=30000) + text = page.evaluate("() => document.getElementById('ok').textContent") + + assert text == "socks-routed", "page did not load through the SOCKS proxy" + assert (_USER, _PASS) in socks_auth.seen_creds, ( + f"patched Firefox did not send the SOCKS5 auth creds from prefs; " + f"proxy saw: {socks_auth.seen_creds!r}" + ) diff --git a/tests/test_recaptcha_seed.py b/tests/test_recaptcha_seed.py new file mode 100644 index 0000000..dbd1821 --- /dev/null +++ b/tests/test_recaptcha_seed.py @@ -0,0 +1,349 @@ +"""Unit tests for the deterministic reCAPTCHA cookie builder. + +Validates the contract: + - 6 .google.com cookies always present + - Per-site cookies built from a `browsing_history` list (sampled by the + Bayesian network in _fpforge) + - Determinism: same (seed, history) → identical content + - Chrome 400-day cookie cap respected + - Playwright add_cookies field requirements satisfied +""" +import pytest + +from invisible_playwright._recaptcha_seed import ( + build_cookies, + _sub_seed, +) + + +pytestmark = pytest.mark.unit + + +_FIXED_NOW = 1779600000 # 2026-05-23, frozen for determinism + + +# Sample browsing history for tests (mimics what _fpforge produces). +_SAMPLE_HISTORY = [ + {"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, + {"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"}, + {"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, + {"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"}, + {"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"}, +] + + +# =========================================================================== +# 1. Set composition +# =========================================================================== + +def test_only_google_cookies_when_no_history(): + """Empty/None history → only the 5 .google.com cookies (1P_JAR removed + in realism round 2 — deprecated by Google 2022).""" + cookies = build_cookies(seed=42, browsing_history=None, now=_FIXED_NOW) + names = sorted(c["name"] for c in cookies) + assert names == sorted(["NID", "CONSENT", "SOCS", + "_GRECAPTCHA", "ENID"]) + assert all(c["domain"] == ".google.com" for c in cookies) + + +def test_browsing_history_adds_host_cookies(): + """Each history site contributes 1+ cookies on its domain.""" + cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + google = [c for c in cookies if c["domain"] == ".google.com"] + assert len(google) == 5 # 1P_JAR removed + + domains = {c["domain"] for c in cookies if c["domain"] != ".google.com"} + for site in _SAMPLE_HISTORY: + assert f".{site['name']}" in domains + + +def test_domain_dot_prefix_normalized(): + """All host cookie domains have a leading dot for sub-domain coverage.""" + cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + for c in cookies: + assert c["domain"].startswith("."), f"missing dot: {c['domain']}" + + +# =========================================================================== +# 2. Cookie profile recipes (each profile yields the expected cookie set) +# =========================================================================== + +def test_profile_minimal_yields_ga_only(): + history = [{"name": "x.com", "cookie_profile": "minimal"}] + cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) + host = [c for c in cookies if c["domain"] == ".x.com"] + names = [c["name"] for c in host] + assert names == ["_ga"] + + +def test_profile_ga_only_yields_ga_and_gid(): + history = [{"name": "x.com", "cookie_profile": "ga_only"}] + cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) + host = [c for c in cookies if c["domain"] == ".x.com"] + names = sorted(c["name"] for c in host) + assert names == ["_ga", "_gid"] + + +def test_profile_ga_cf_yields_ga_and_cf_bm(): + history = [{"name": "x.com", "cookie_profile": "ga_cf"}] + cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) + host = [c for c in cookies if c["domain"] == ".x.com"] + names = sorted(c["name"] for c in host) + assert names == ["__cf_bm", "_ga"] + + +def test_profile_ga_consent_yields_three_cookies(): + history = [{"name": "x.com", "cookie_profile": "ga_consent"}] + cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) + host = [c for c in cookies if c["domain"] == ".x.com"] + names = sorted(c["name"] for c in host) + # Always _ga + _gid + one of OneTrust|CookieYes + assert "_ga" in names and "_gid" in names + assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent")) + assert len(host) == 3 + + +def test_profile_ga_consent_clarity_yields_at_least_four_cookies(): + """Always _ga + _gid + _clck + consent banner. Optionally _fbp, _dc_gtm_*, + __hssrc (probabilistic per rng — see test_new_helper_cookies_*).""" + history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}] + cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) + host = [c for c in cookies if c["domain"] == ".x.com"] + names = sorted(c["name"] for c in host) + assert "_ga" in names and "_gid" in names and "_clck" in names + assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent")) + assert len(host) >= 4 # 4 baseline + 0-3 helpers + + +def test_unknown_profile_falls_back_to_ga(): + history = [{"name": "x.com", "cookie_profile": "nonexistent_profile"}] + cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) + host = [c for c in cookies if c["domain"] == ".x.com"] + assert [c["name"] for c in host] == ["_ga"] + + +# =========================================================================== +# 3. Determinism +# =========================================================================== + +def test_same_seed_and_history_same_content(): + a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + b = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + assert a == b + + +def test_different_seed_different_content(): + a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + b = build_cookies(seed=99, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + a_nid = next(c for c in a if c["name"] == "NID")["value"] + b_nid = next(c for c in b if c["name"] == "NID")["value"] + assert a_nid != b_nid + + +def test_history_order_does_not_affect_domain_specific_cookies(): + """Sub-seed is keyed on domain name, not order in history list.""" + h1 = [_SAMPLE_HISTORY[0], _SAMPLE_HISTORY[1]] + h2 = [_SAMPLE_HISTORY[1], _SAMPLE_HISTORY[0]] + a = {(c["domain"], c["name"]): c["value"] + for c in build_cookies(seed=42, browsing_history=h1, now=_FIXED_NOW) + if c["domain"] != ".google.com"} + b = {(c["domain"], c["name"]): c["value"] + for c in build_cookies(seed=42, browsing_history=h2, now=_FIXED_NOW) + if c["domain"] != ".google.com"} + assert a == b + + +def test_sub_seed_distinct_tags_distinct_streams(): + assert _sub_seed(42, "google") != _sub_seed(42, "dom:github.com") + assert _sub_seed(42, "dom:github.com") != _sub_seed(42, "dom:amazon.com") + assert _sub_seed(0, "any") != 0 # seed=0 still produces non-zero sub-seed + + +# =========================================================================== +# 4. Format / structural correctness for the Google batch +# =========================================================================== + +def test_nid_format(): + cookies = build_cookies(seed=42, now=_FIXED_NOW) + nid = next(c for c in cookies if c["name"] == "NID") + prefix, b64 = nid["value"].split("=", 1) + assert prefix.isdigit() and len(prefix) == 3 + # Broadened to 100-540 in realism round 2 to cover historical NID versions + assert 100 <= int(prefix) <= 540 + assert len(b64) == 178 + + +def test_consent_format(): + cookies = build_cookies(seed=42, now=_FIXED_NOW) + consent = next(c for c in cookies if c["name"] == "CONSENT") + assert consent["value"].startswith("YES+cb.") + assert "+FX+" in consent["value"] + + +# =========================================================================== +# 5. Chrome 400-day cookie cap compliance +# =========================================================================== + +def test_all_expiries_within_400_day_cap(): + """Chrome 104+ caps cookie expiry to 400 days. Cookies > 400d silently + truncated / dropped. We tighten everything to <=395d (except __cf_bm + which is short-lived telemetry).""" + cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + max_allowed = _FIXED_NOW + 400 * 86400 + for c in cookies: + # Short-lived telemetry cookies are fine + if c["name"] in ("__cf_bm", "1P_JAR", "_gid"): + continue + assert c["expires"] <= max_allowed, ( + f"Cookie {c['name']} expires {c['expires'] - _FIXED_NOW}s " + f"(> 400d cap) — would be silently dropped" + ) + + +# =========================================================================== +# 6. Playwright add_cookies field requirements +# =========================================================================== + +def test_all_cookies_have_required_playwright_fields(): + cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + for c in cookies: + assert c.get("name"), f"missing name: {c}" + assert c.get("value") is not None, f"missing value: {c}" + assert c.get("domain"), f"missing domain: {c}" + assert c.get("path") == "/", f"path != / for {c['name']}" + + +def test_modern_cookies_marked_secure(): + """Cookies with sameSite=None require secure=True under Firefox/Chrome. + Also generally needed for cookies set via Playwright add_cookies without + a navigation context.""" + cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + for c in cookies: + if c.get("sameSite") == "None": + assert c.get("secure") is True, f"{c['name']} None+!secure invalid" + + +def test_httponly_on_signed_cookies(): + cookies = build_cookies(seed=42, now=_FIXED_NOW) + nid = next(c for c in cookies if c["name"] == "NID") + enid = next(c for c in cookies if c["name"] == "ENID") + assert nid.get("httpOnly") is True + assert enid.get("httpOnly") is True + + +# =========================================================================== +# 7. End-to-end with real fpforge Profile +# =========================================================================== + +def test_with_real_fpforge_profile(): + """End-to-end: generate a real Profile, ensure browsing_history is populated + and build_cookies works against it.""" + from invisible_playwright._fpforge import generate_profile + prof = generate_profile(seed=42) + assert isinstance(prof.browsing_history, list) + # The Bayesian network samples ~15-30 sites per persona + assert 5 <= len(prof.browsing_history) <= 50, \ + f"unexpected history length: {len(prof.browsing_history)}" + # Each entry has the expected fields + for site in prof.browsing_history: + assert "name" in site and "category" in site and "cookie_profile" in site + # build_cookies works against the real profile + cookies = build_cookies(seed=prof.seed, browsing_history=prof.browsing_history, + now=_FIXED_NOW) + # 6 google + at least 1 cookie per visited site + assert len(cookies) >= 6 + len(prof.browsing_history) + + +def test_same_seed_same_browsing_history_via_fpforge(): + """Profile.browsing_history is deterministic from seed (Bayesian sampler).""" + from invisible_playwright._fpforge import generate_profile + a = generate_profile(seed=42).browsing_history + b = generate_profile(seed=42).browsing_history + assert a == b + + +# =========================================================================== +# 8. Realism improvements (2026-05-24 round 2) +# =========================================================================== + +def test_no_1p_jar_cookie(): + """1P_JAR was deprecated by Google in 2022. Including it is an + anachronism flag for fingerprinters that look at cookie freshness.""" + cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) + names = {c["name"] for c in cookies} + assert "1P_JAR" not in names + + +def test_nid_prefix_broadened_range(): + """NID 3-digit prefix should cover historical versions (137/105/511/525 + seen in real captures) — range 100-540, not just 500-540.""" + seen_prefixes = set() + for seed in range(200): + cookies = build_cookies(seed=seed, now=_FIXED_NOW) + nid = next(c for c in cookies if c["name"] == "NID") + prefix = int(nid["value"].split("=", 1)[0]) + seen_prefixes.add(prefix) + assert min(seen_prefixes) < 500, f"NID range never goes below 500 ({sorted(seen_prefixes)[:5]})" + assert max(seen_prefixes) <= 540 + + +def test_consent_lang_from_timezone_eu(): + """CONSENT cookie's `lang+region` token derived from IANA timezone.""" + cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Rome") + consent = next(c for c in cookies if c["name"] == "CONSENT") + assert ".it+IT+" in consent["value"], f"expected it+IT in: {consent['value']}" + + +def test_consent_lang_default_fx(): + """Unknown / US timezone → default `en+FX` (non-EU fallback).""" + cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="America/New_York") + consent = next(c for c in cookies if c["name"] == "CONSENT") + assert ".en+FX+" in consent["value"] + + +def test_consent_lang_de_for_berlin(): + cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Berlin") + consent = next(c for c in cookies if c["name"] == "CONSENT") + assert ".de+DE+" in consent["value"] + + +def test_consent_lang_no_timezone_default(): + """timezone=None → default en+FX.""" + cookies = build_cookies(seed=42, now=_FIXED_NOW) + consent = next(c for c in cookies if c["name"] == "CONSENT") + assert ".en+FX+" in consent["value"] + + +def test_new_helper_cookies_appear_in_ga_consent_clarity(): + """ga_consent_clarity recipe should sometimes include _fbp, _dc_gtm_*, __hssrc + (probabilistic per rng). Check across many seeds that they appear.""" + saw_fbp = False + saw_gtm = False + saw_hssrc = False + history = [{"name": "site.com", "cookie_profile": "ga_consent_clarity"}] + for seed in range(100): + cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW) + names = {c["name"] for c in cookies if c["domain"] == ".site.com"} + if "_fbp" in names: saw_fbp = True + if any(n.startswith("_dc_gtm_") for n in names): saw_gtm = True + if "__hssrc" in names: saw_hssrc = True + assert saw_fbp, "_fbp never appeared in 100 seeds (rng pick broken)" + assert saw_gtm, "_dc_gtm_* never appeared in 100 seeds" + assert saw_hssrc, "__hssrc never appeared in 100 seeds" + + +def test_fbp_format(): + """_fbp format: fb...""" + history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}] + # Try multiple seeds until we hit a seed that includes _fbp (50% chance) + for seed in range(20): + cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW) + fbp = next((c for c in cookies if c["name"] == "_fbp"), None) + if fbp: + parts = fbp["value"].split(".") + assert parts[0] == "fb" + assert parts[1].isdigit() + assert parts[2].isdigit() and len(parts[2]) >= 13 # unix ms + assert parts[3].isdigit() + return + raise AssertionError("never got _fbp across 20 seeds — distribution broken") diff --git a/tests/test_service_worker.py b/tests/test_service_worker.py new file mode 100644 index 0000000..d077c99 --- /dev/null +++ b/tests/test_service_worker.py @@ -0,0 +1,226 @@ +"""Service worker interception regression tests — issue #18 root cause. + +The bug: `juggler/content/NetworkObserver.js:channelIntercepted` called +`interceptedChannel.interceptAfterServiceWorkerResets()` — an IDL method +that upstream Playwright adds via a C++ patch (InterceptedHttpChannel.cpp ++ nsINetworkInterceptController.idl). Our fork was missing those patches +until firefox-6, so the call threw TypeError → C++ NetworkObserver was +left in an inconsistent state → content process disposal manifested as +"page crash" on sites whose service workers fall through to the network +(e.g., id.sky.com). + +These tests inline-serve a service worker via data: URLs / blob URLs +where possible — no external network required. They assert the page +stays alive across SW registration + fetch lifecycle. + +Run: + pytest tests/test_service_worker.py -m e2e -v + +For dev iteration: + INVPW_BINARY_PATH=/path/to/firefox.exe pytest tests/test_service_worker.py -m e2e -v +""" +from __future__ import annotations + +import http.server +import socketserver +import threading + +import pytest + +from invisible_playwright import InvisiblePlaywright + + +# --------------------------------------------------------------------------- +# Local HTTP fixture server — service workers need a real http(s) origin +# (data: and about:blank are opaque-origin, no SW registration possible). +# --------------------------------------------------------------------------- + + +class _SWFixtureHandler(http.server.BaseHTTPRequestHandler): + """Serves a tiny set of routes for SW lifecycle testing.""" + + PAGES = { + "/": (200, "text/html", b""" +sw-host + + + +"""), + "/sw.js": (200, "application/javascript", b""" +self.addEventListener('install', e => self.skipWaiting()); +self.addEventListener('activate', e => e.waitUntil(clients.claim())); +self.addEventListener('fetch', e => { + if (e.request.url.endsWith('/from-sw')) { + e.respondWith(new Response('hello from SW', { + headers: {'content-type': 'text/plain'}, + })); + } + // Fall through for everything else - exercises the + // interceptAfterServiceWorkerResets path that was broken pre-firefox-6. +}); +"""), + "/from-sw": (200, "text/plain", b"network-fallback"), + "/from-network": (200, "text/plain", b"net-only"), + } + + def do_GET(self): + path = self.path.split("?", 1)[0] + if path in self.PAGES: + status, ctype, body = self.PAGES[path] + self.send_response(status) + self.send_header("Content-Type", ctype) + self.send_header("Content-Length", str(len(body))) + # SW requires HTTPS or localhost — we're on localhost so plain http is fine + self.send_header("Service-Worker-Allowed", "/") + self.end_headers() + self.wfile.write(body) + else: + self.send_response(404) + self.end_headers() + + def log_message(self, *args, **kwargs): + pass # silence stdout + + +@pytest.fixture(scope="module") +def fixture_server(): + """Spin up a localhost HTTP server with SW-friendly headers. Yields + the base URL (e.g., 'http://127.0.0.1:54321').""" + httpd = socketserver.TCPServer(("127.0.0.1", 0), _SWFixtureHandler) + port = httpd.server_address[1] + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + try: + yield f"http://127.0.0.1:{port}" + finally: + httpd.shutdown() + httpd.server_close() + + +@pytest.fixture(scope="module") +def page(firefox_binary): + with InvisiblePlaywright( + seed=42, + binary_path=firefox_binary, + headless=True, + ) as browser: + ctx = browser.new_context() + p = ctx.new_page() + yield p + + +# --------------------------------------------------------------------------- +# Regression tests +# --------------------------------------------------------------------------- + + +@pytest.mark.e2e +def test_service_worker_registration_does_not_crash_page(page, fixture_server): + """Navigate to a page that registers a SW. The page must survive the + registration. Pre-firefox-6 this crashed if the SW path hit the missing + `interceptAfterServiceWorkerResets()` IDL method.""" + crashed = {"v": False} + page.on("crash", lambda p: crashed.__setitem__("v", True)) + + page.goto(f"{fixture_server}/", timeout=15_000) + # Wait for SW to register (or fail cleanly) + page.wait_for_function( + "window.__swState !== 'loading'", timeout=10_000 + ) + state = page.evaluate("window.__swState") + assert not crashed["v"], f"page crashed during SW registration (state={state!r})" + # state should be 'registered' or 'failed:...' (Firefox supports SW) + assert state in ("registered",) or state.startswith("failed:"), ( + f"unexpected SW state: {state!r}" + ) + + +@pytest.mark.e2e +def test_page_with_sw_can_navigate_repeatedly(page, fixture_server): + """Once a SW is registered, repeated navigations exercise the + interception path on every request. Pre-firefox-6, this hit the C++ + crash after a few cycles.""" + crashed = {"v": False} + page.on("crash", lambda p: crashed.__setitem__("v", True)) + + page.goto(f"{fixture_server}/", timeout=15_000) + page.wait_for_function("window.__swState !== 'loading'", timeout=10_000) + + # 5 reloads — the SW fetch handler runs each time + for _ in range(5): + page.reload(timeout=15_000) + assert not crashed["v"] + assert page.evaluate("document.title") == "sw-host" + + +@pytest.mark.e2e +def test_fetch_through_sw_returns_sw_synthesized_response(page, fixture_server): + """The SW intercepts `/from-sw` and synthesizes a response without + hitting the network. Verifies the SW fetch path is functional — this + is the exact flow that crashed in id.sky.com.""" + page.goto(f"{fixture_server}/", timeout=15_000) + page.wait_for_function("window.__swState === 'registered'", timeout=10_000) + + # First request to /from-sw routes through the SW + body = page.evaluate("""async (base) => { + const r = await fetch(base + '/from-sw'); + return await r.text(); + }""", fixture_server) + # Either the SW served 'hello from SW' (intercepted) or the network + # served 'network-fallback' (if SW didn't claim yet). Both are OK — + # the regression we test is that it doesn't CRASH. + assert body in ("hello from SW", "network-fallback"), ( + f"unexpected /from-sw response body: {body!r}" + ) + + +@pytest.mark.e2e +def test_sw_fall_through_to_network_does_not_crash(page, fixture_server): + """Request a URL the SW doesn't handle → falls through to network. + This is the `interceptAfterServiceWorkerResets()` code path: the SW + decides not to handle, the channel goes back to network. Without the + C++ patch, this is where the C++ side ended up in an inconsistent + state.""" + crashed = {"v": False} + page.on("crash", lambda p: crashed.__setitem__("v", True)) + + page.goto(f"{fixture_server}/", timeout=15_000) + page.wait_for_function("window.__swState === 'registered'", timeout=10_000) + + # /from-network is NOT intercepted by SW — exercises the fall-through + body = page.evaluate("""async (base) => { + const r = await fetch(base + '/from-network'); + return await r.text(); + }""", fixture_server) + assert body == "net-only" + assert not crashed["v"] + + +@pytest.mark.e2e +def test_sw_unregister_then_register_again(page, fixture_server): + """Unregistering then re-registering exercises lifecycle bookkeeping + in the C++ InterceptedHttpChannel state machine.""" + crashed = {"v": False} + page.on("crash", lambda p: crashed.__setitem__("v", True)) + + page.goto(f"{fixture_server}/", timeout=15_000) + page.wait_for_function("window.__swState === 'registered'", timeout=10_000) + + # Unregister all SWs then register again + result = page.evaluate("""async () => { + const regs = await navigator.serviceWorker.getRegistrations(); + for (const r of regs) await r.unregister(); + const r2 = await navigator.serviceWorker.register('/sw.js'); + return r2.scope; + }""") + assert "/" in result + assert not crashed["v"] diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 0000000..7702f7f --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,103 @@ +"""Regression tests for issue #24: CLI version reporting. + +Two distinct symptoms reported by `i43-j`: + 1. `python -m invisible_playwright --version` errored out (only the + `version` subcommand worked). + 2. `python -m invisible_playwright version` printed the literal string + "0.1.0" regardless of the installed version (a stale hardcoded + `__version__` in __init__.py that nobody had remembered to bump). + +These tests pin down both behaviours so the regressions don't sneak back +in via a future copy/paste. +""" +import io +import re +import subprocess +import sys +from contextlib import redirect_stdout + +import pytest + +import invisible_playwright +from invisible_playwright import __version__, cli + + +pytestmark = pytest.mark.unit + + +def test_version_matches_installed_package_metadata(): + """__version__ must come from importlib.metadata, not a hardcoded literal, + so it can never drift from the pyproject.toml `version` field.""" + from importlib.metadata import version as pkg_version + assert __version__ == pkg_version("invisible-playwright") + + +def test_version_is_not_the_stale_010_string(): + """Issue #24 regression: __version__ used to be hardcoded as '0.1.0' + and never updated. If this ever returns to a literal '0.1.0' the + package has been published or shipped with stale metadata.""" + assert __version__ != "0.1.0", ( + "__version__ is the stale hardcoded '0.1.0' string — issue #24 has " + "regressed. Use importlib.metadata to derive it from pyproject.toml." + ) + + +def test_version_subcommand_prints_real_version(): + """`invisible-playwright version` must print the actual installed version, + not the old hardcoded '0.1.0'.""" + buf = io.StringIO() + with redirect_stdout(buf): + rc = cli.main(["version"]) + assert rc == 0 + out = buf.getvalue() + assert f"invisible_playwright {__version__}" in out + assert "0.1.0" not in out or __version__ == "0.1.0" # safety: only allowed if truly 0.1.0 + assert "BINARY_VERSION=" in out + assert "Firefox " in out + + +def test_dash_dash_version_flag_works(): + """Issue #24 reporter: `python -m invisible_playwright --version` used to + error with 'the following arguments are required: cmd' because there was + no top-level --version flag, only the `version` subcommand. Now the + Python convention works too.""" + # argparse's --version action calls sys.exit(0) directly, so use subprocess. + r = subprocess.run( + [sys.executable, "-m", "invisible_playwright", "--version"], + capture_output=True, text=True, timeout=15, + ) + assert r.returncode == 0, f"--version returned {r.returncode}, stderr={r.stderr!r}" + # argparse may emit on stdout or stderr depending on version + combined = r.stdout + r.stderr + assert "invisible_playwright" in combined + assert __version__ in combined + + +def test_no_args_prints_help_not_traceback(): + """`python -m invisible_playwright` with no args should be graceful + (print help, exit non-zero) rather than crashing with a traceback.""" + r = subprocess.run( + [sys.executable, "-m", "invisible_playwright"], + capture_output=True, text=True, timeout=15, + ) + # Either prints help (rc=2) or shows usage. Must NOT contain a traceback. + assert "Traceback" not in (r.stdout + r.stderr) + assert "usage:" in (r.stdout + r.stderr).lower() + + +def test_dash_V_short_flag_works(): + """Alias `-V` for `--version` (Python convention).""" + r = subprocess.run( + [sys.executable, "-m", "invisible_playwright", "-V"], + capture_output=True, text=True, timeout=15, + ) + assert r.returncode == 0 + assert __version__ in (r.stdout + r.stderr) + + +def test_version_matches_semver_shape(): + """Sanity: version should look like a semver (digits.digits.digits) + or a PEP-440 dev marker, not a placeholder string.""" + assert re.match(r"^\d+\.\d+\.\d+", __version__), ( + f"__version__ {__version__!r} doesn't look like a real version" + ) diff --git a/tests/test_webrtc_realness.py b/tests/test_webrtc_realness.py new file mode 100644 index 0000000..afa0736 --- /dev/null +++ b/tests/test_webrtc_realness.py @@ -0,0 +1,453 @@ +"""WebRTC realness regression tests. + +Two layers, both runnable on GitHub CI: + +* **unit** (`@pytest.mark.unit`) — pure SDP/candidate assertions against golden + samples. No browser, no proxy, no network. These lock in every rule we found + on 2026-06-06: host must be mDNS ``.local``; the synthetic srflx must carry the + egress IP with a GENUINE nICEr priority (never ``local_pref == 0xFFFF``) and a + stable, distinct foundation; CreepJS's resolver must return the egress, and a + host-only SDP must read as "blocked". They run in the standard ``tests.yml``. + +* **e2e** (`@pytest.mark.e2e`) — launch the patched binary and verify the live + ICE gather. "Being behind a proxy" is faked WITHOUT smartproxy: + - the egress IP is injected via ``STEALTHFOX_WEBRTC_PUBLIC_IP`` (RFC 5737 + TEST-NET, so it never collides with a real IP); + - the "behind a TCP-only SOCKS proxy" condition is reproduced by a tiny + in-process SOCKS5 server that relays TCP CONNECT but refuses UDP ASSOCIATE + (exactly a residential TCP-only proxy → WebRTC's default-route UDP probe + fails → exercises the Fix C fallback). No credentials, no external proxy. + Excluded from the default run; a binary is located via ``STEALTHFOX_E2E_BINARY`` + (or the locally-built tree), else the test skips. +""" +from __future__ import annotations + +import os +import re +import select +import socket +import struct +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest + +# ────────────────────────────────────────────────────────────────────────── +# Pure SDP / ICE-candidate helpers (no I/O) — the heart of the sentinels. +# ────────────────────────────────────────────────────────────────────────── +_CAND = re.compile( + r"candidate:(?P\S+)\s+(?P\d+)\s+(?PUDP|TCP|udp|tcp)\s+" + r"(?P\d+)\s+(?P
\S+)\s+(?P\d+)\s+typ\s+(?P\w+)" + r"(?:.*?raddr\s+(?P\S+)\s+rport\s+(?P\d+))?" +) + + +def parse_candidate(line): + """Parse one ``a=candidate:`` / ``candidate:`` line into a dict (or None).""" + m = _CAND.search(line) + if not m: + return None + d = m.groupdict() + d["component"] = int(d["component"]) + d["priority"] = int(d["priority"]) + d["port"] = int(d["port"]) + d["proto"] = d["proto"].upper() + if d["rport"] is not None: + d["rport"] = int(d["rport"]) + return d + + +def decode_priority(prio): + """Split a candidate priority into nICEr's fields (RFC 5245 layout that + nICEr emits: type<<24 | iface<<16 | dir<<13 | stun<<8 | (256-component)).""" + return { + "type_pref": (prio >> 24) & 0xFF, + "iface_pref": (prio >> 16) & 0xFF, + "local_pref": (prio >> 8) & 0xFFFF, + "direction": (prio >> 13) & 0x7, + "stun_priority": (prio >> 8) & 0x1F, + "component": 256 - (prio & 0xFF), + } + + +def is_mdns(addr): + return bool(addr) and str(addr).endswith(".local") + + +def candidates(sdp_or_lines): + if isinstance(sdp_or_lines, str): + lines = re.findall(r"(?:a=)?candidate:[^\r\n]*", sdp_or_lines) + else: + lines = list(sdp_or_lines) + return [c for c in (parse_candidate(l) for l in lines) if c] + + +def host_candidates(cands): + return [c for c in cands if c["typ"] == "host"] + + +def srflx_candidates(cands): + return [c for c in cands if c["typ"] == "srflx"] + + +def host_is_mdns(cands): + """Every host candidate must be a ``.local`` mDNS name, never a raw + LAN IP (the §9.4 leak form that fails BrowserLeaks).""" + hosts = host_candidates(cands) + return bool(hosts) and all(is_mdns(c["address"]) for c in hosts) + + +def srflx_realness(cand, expected_ip=None): + """Return (ok, reasons) for whether ``cand`` looks like a GENUINE nICEr UDP + server-reflexive candidate. Encodes the 2026-06-06 findings.""" + reasons = [] + if cand["typ"] != "srflx": + reasons.append("not a srflx candidate") + return False, reasons + if expected_ip is not None and cand["address"] != expected_ip: + reasons.append(f"address {cand['address']} != expected {expected_ip}") + p = decode_priority(cand["priority"]) + if p["type_pref"] != 100: + reasons.append(f"type_pref {p['type_pref']} != 100 (SRV_RFLX)") + if p["local_pref"] == 0xFFFF: + reasons.append("local_pref == 0xFFFF — impossible nICEr value (the old hardcoded tell)") + elif not (0x7000 <= p["local_pref"] < 0x8000): + reasons.append(f"local_pref {p['local_pref']} outside the genuine ~0x7E00-0x7FFF band") + if not (16 <= p["stun_priority"] <= 31): + reasons.append(f"stun_priority {p['stun_priority']} implausible (expect 31-server_id)") + if cand.get("raddr") not in (None, "0.0.0.0"): + reasons.append(f"raddr {cand['raddr']} not redacted to 0.0.0.0") + return (not reasons), reasons + + +def creep_get_ipaddress(sdp): + """Faithful port of CreepJS's getIPAddress(sdp): connection line first, then + the first candidate IP; '0.0.0.0' counts as blocked. Returns None if blocked + — i.e. exactly what makes CreepJS render 'stun connection: blocked'.""" + blocked = "0.0.0.0" + conn = (re.findall(r"c=IN\s.+\s", sdp) or [""])[0].strip().split(" ") + conn_ip = conn[2] if len(conn) > 2 else "" + if conn_ip and conn_ip != blocked: + return conn_ip + m = re.search(r"(udp|tcp)\s(?:\d|\w)+\s((?:\d|\w|\.|:)+)(?=\s)", sdp, re.I) + ip = m.group(2) if m else None + return ip if (ip and ip != blocked) else None + + +# ────────────────────────────────────────────────────────────────────────── +# Golden samples — real priority/foundation values, TEST-NET IPs (RFC 5737) +# so no real address is ever committed (feedback_pre_push_privacy_check). +# ────────────────────────────────────────────────────────────────────────── +HOST_MDNS = "candidate:0 1 UDP 2122252543 1460e928-16b3-4c66-80ad-04abcdef0000.local 54551 typ host" +HOST_RAW_IP = "candidate:0 1 UDP 2122252543 192.168.1.20 54551 typ host" # §9.4 leak form +VANILLA_SRFLX = "candidate:1 1 UDP 1685987327 203.0.113.50 3755 typ srflx raddr 0.0.0.0 rport 0" +OURS_SRFLX = "candidate:1 1 UDP 1686052863 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0" +# Pre-fix injection: local_pref hardcoded to 0xFFFF (priority 1694498815). The tell. +OLD_BAD_SRFLX = "candidate:2 1 UDP 1694498815 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0" + +SDP_GOOD = ( + "v=0\r\nc=IN IP4 0.0.0.0\r\n" + f"a={HOST_MDNS}\r\na={OURS_SRFLX}\r\n" +) +SDP_BLOCKED = "v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n" # host-only, no srflx + + +# ────────────────────────────────────────────────────────────────────────── +# UNIT sentinels (run on GitHub CI) +# ────────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_parse_and_decode_basics(): + c = parse_candidate(OURS_SRFLX) + assert c["typ"] == "srflx" and c["proto"] == "UDP" + assert c["address"] == "203.0.113.7" and c["raddr"] == "0.0.0.0" and c["rport"] == 0 + p = decode_priority(c["priority"]) + assert p["type_pref"] == 100 and p["stun_priority"] == 31 and p["component"] == 1 + + +@pytest.mark.unit +def test_genuine_srflx_passes(): + for line in (VANILLA_SRFLX, OURS_SRFLX): + ok, reasons = srflx_realness(parse_candidate(line), expected_ip=parse_candidate(line)["address"]) + assert ok, reasons + + +@pytest.mark.unit +def test_old_0xffff_srflx_is_rejected(): + """Fix A sentinel: local_pref == 0xFFFF must be flagged as fake.""" + ok, reasons = srflx_realness(parse_candidate(OLD_BAD_SRFLX)) + assert not ok + assert any("0xFFFF" in r for r in reasons), reasons + + +@pytest.mark.unit +def test_host_must_be_mdns_not_raw_ip(): + """§9.4 sentinel: raw-IP host candidate is a leak; .local is required.""" + assert host_is_mdns(candidates([HOST_MDNS])) is True + assert host_is_mdns(candidates([HOST_RAW_IP])) is False + + +@pytest.mark.unit +def test_srflx_foundation_distinct_from_host(): + """Fix B sentinel: srflx foundation must differ from the host foundations.""" + cands = candidates([HOST_MDNS, OURS_SRFLX]) + host_fnds = {c["foundation"] for c in host_candidates(cands)} + srflx_fnds = {c["foundation"] for c in srflx_candidates(cands)} + assert srflx_fnds and srflx_fnds.isdisjoint(host_fnds) + + +@pytest.mark.unit +def test_creep_resolver_returns_egress_when_srflx_present(): + assert creep_get_ipaddress(SDP_GOOD) == "203.0.113.7" + + +@pytest.mark.unit +def test_creep_resolver_reports_blocked_for_host_only(): + """The exact false-green we shipped: host-only (.local) SDP → no public IP + → CreepJS shows 'blocked'. The resolver must return None here.""" + assert creep_get_ipaddress(SDP_BLOCKED) is None + + +@pytest.mark.unit +def test_mdns_host_is_invisible_to_creep_resolver(): + """A .local host must NOT be mis-read as an IP (the hyphen in the UUID is + what makes CreepJS skip it and fall through to the srflx).""" + assert creep_get_ipaddress("v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n") is None + + +# ────────────────────────────────────────────────────────────────────────── +# Fake-proxy infrastructure for e2e: a tiny TCP-only SOCKS5 server. +# ────────────────────────────────────────────────────────────────────────── +class _Socks5TcpOnly: + """Minimal SOCKS5: no-auth, CONNECT (TCP) relayed, UDP ASSOCIATE refused. + + Reproduces a residential TCP-only proxy: pages load over TCP, but WebRTC's + UDP path is dead — which (for a no-camera page in default_address_only mode) + is exactly what made the default-route probe fail and ICE return zero + candidates before Fix C. + """ + + def __init__(self): + self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self._srv.bind(("127.0.0.1", 0)) + self._srv.listen(16) + self.port = self._srv.getsockname()[1] + self.udp_associate_attempts = 0 + self._stop = False + self._t = threading.Thread(target=self._serve, daemon=True) + self._t.start() + + def _serve(self): + while not self._stop: + try: + conn, _ = self._srv.accept() + except OSError: + break + threading.Thread(target=self._handle, args=(conn,), daemon=True).start() + + def _recv_exact(self, sock, n): + buf = b"" + while len(buf) < n: + chunk = sock.recv(n - len(buf)) + if not chunk: + return None + buf += chunk + return buf + + def _handle(self, conn): + try: + head = self._recv_exact(conn, 2) + if not head or head[0] != 0x05: + conn.close() + return + nmethods = head[1] + self._recv_exact(conn, nmethods) + conn.sendall(b"\x05\x00") # no-auth + req = self._recv_exact(conn, 4) + if not req: + conn.close() + return + ver, cmd, _, atyp = req + if atyp == 0x01: + addr = socket.inet_ntoa(self._recv_exact(conn, 4)) + elif atyp == 0x03: + ln = self._recv_exact(conn, 1)[0] + addr = self._recv_exact(conn, ln).decode("ascii", "ignore") + elif atyp == 0x04: + addr = socket.inet_ntop(socket.AF_INET6, self._recv_exact(conn, 16)) + else: + conn.close() + return + port = struct.unpack("!H", self._recv_exact(conn, 2))[0] + if cmd != 0x01: # not CONNECT (e.g. UDP ASSOCIATE) → refuse + self.udp_associate_attempts += 1 + conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00") # cmd not supported + conn.close() + return + try: + upstream = socket.create_connection((addr, port), timeout=15) + except OSError: + conn.sendall(b"\x05\x04\x00\x01\x00\x00\x00\x00\x00\x00") # host unreachable + conn.close() + return + conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00") # success + self._relay(conn, upstream) + except Exception: + try: + conn.close() + except Exception: + pass + + def _relay(self, a, b): + try: + while True: + r, _, _ = select.select([a, b], [], [], 30) + if not r: + break + for s in r: + data = s.recv(65536) + if not data: + return + (b if s is a else a).sendall(data) + finally: + for s in (a, b): + try: + s.close() + except Exception: + pass + + def close(self): + self._stop = True + try: + self._srv.close() + except Exception: + pass + + +# Same per-event probe CreepJS runs (kept tiny; raw string = one escape level). +_PROBE_JS = r"""async () => { + const pc = new RTCPeerConnection({iceCandidatePoolSize:1, iceServers:[{urls:[ + 'stun:stun4.l.google.com:19302','stun:stun3.l.google.com:19302']}]}); + pc.createDataChannel(''); + const cands = []; + pc.addEventListener('icecandidate', e => { if (e.candidate && e.candidate.candidate) cands.push(e.candidate.candidate); }); + await pc.setLocalDescription(await pc.createOffer({offerToReceiveAudio:1, offerToReceiveVideo:1})); + await new Promise(r => setTimeout(r, 3500)); + const sdp = (pc.localDescription && pc.localDescription.sdp) || ''; + try { pc.close(); } catch(e) {} + return { candidates: cands, sdp }; +}""" + +_FAKE_EGRESS = "203.0.113.7" # RFC 5737 TEST-NET-3 + + +def _e2e_binary(): + # Honor both env vars so the whole e2e suite targets one binary from a single + # setting (INVPW_BINARY_PATH is what conftest's firefox_binary uses). + cand = os.environ.get("STEALTHFOX_E2E_BINARY") or os.environ.get("INVPW_BINARY_PATH") + if cand and os.path.exists(cand): + return cand + built = r"C:\ff\source\obj-x86_64-pc-windows-msvc\dist\bin\firefox.exe" + if os.path.exists(built): + return built + return None + + +@pytest.fixture +def socks5_tcp_only(): + srv = _Socks5TcpOnly() + yield srv + srv.close() + + +@pytest.fixture +def local_https_page(): + """A trivial localhost page (used by the no-proxy srflx test).""" + class H(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "text/html") + self.end_headers() + self.wfile.write(b"wrtc") + + def log_message(self, *a): + pass + + httpd = HTTPServer(("127.0.0.1", 0), H) + threading.Thread(target=httpd.serve_forever, daemon=True).start() + yield f"http://127.0.0.1:{httpd.server_address[1]}/" + httpd.shutdown() + + +def _launch(**extra): + from invisible_playwright import InvisiblePlaywright + + kw = {"headless": True, + # Fixed zone so the wrapper does NOT run timezone="auto" egress + # discovery through the (fake) proxy — irrelevant here, we inject the + # egress IP directly and want the launch deterministic/offline. + "timezone": "America/New_York", + "extra_prefs": {"media.peerconnection.ice.obfuscate_host_addresses": True}} + kw.update(extra) + return InvisiblePlaywright(**kw) + + +@pytest.mark.e2e +def test_srflx_is_real_and_resolvable(local_https_page): + """No proxy needed: the egress is faked via the env. Asserts the live srflx + is genuine (Fix A/B) and that CreepJS's resolver returns it (not blocked).""" + binary = _e2e_binary() + if not binary: + pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)") + os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS + os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1" + with _launch(binary_path=binary) as browser: + page = browser.new_context().new_page() + page.goto(local_https_page, wait_until="domcontentloaded", timeout=60000) + res = page.evaluate(_PROBE_JS) + cands = candidates(res["candidates"]) + assert cands, "ICE produced ZERO candidates (blocked)" + assert host_is_mdns(cands), [c["address"] for c in host_candidates(cands)] + srflx = [c for c in srflx_candidates(cands) if c["address"] == _FAKE_EGRESS] + assert srflx, f"no synthetic srflx with {_FAKE_EGRESS}: {res['candidates']}" + ok, reasons = srflx_realness(srflx[0], expected_ip=_FAKE_EGRESS) + assert ok, reasons + # Two srflx for the same base must share ONE stable foundation (Fix B). + assert len({c["foundation"] for c in srflx}) == 1 + assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS + + +@pytest.mark.e2e +def test_not_blocked_behind_tcp_only_socks(socks5_tcp_only): + """Fix C sentinel: behind a TCP-only SOCKS proxy on a remote origin, ICE + must still complete (host .local + synthetic srflx), not return zero + candidates. Without Fix C this page is fully 'blocked'.""" + binary = _e2e_binary() + if not binary: + pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)") + os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS + os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1" + proxy = {"server": f"socks5://127.0.0.1:{socks5_tcp_only.port}"} + try: + with _launch(binary_path=binary, proxy=proxy) as browser: + page = browser.new_context().new_page() + # remote origin loaded THROUGH the local SOCKS proxy (not localhost, + # so no proxy-bypass) → WebRTC proxy config active → Fix C path. + page.goto("https://example.com/", wait_until="domcontentloaded", timeout=70000) + res = page.evaluate(_PROBE_JS) + except Exception as exc: # network/proxy unavailable in this environment + pytest.skip(f"proxy/network path unavailable: {exc!r}") + cands = candidates(res["candidates"]) + # Hard regression check: ZERO candidates means WebRTC is fully blocked behind + # the SOCKS proxy — that's the Fix C regression this sentinel exists to catch. + assert cands, "behind SOCKS the gather returned ZERO candidates — Fix C regressed (blocked)" + assert host_is_mdns(cands) + # The synthetic srflx (= fake egress) needs the remote origin to load FULLY + # through the proxy so the WebRTC proxy config engages. That path is + # environment-sensitive (it doesn't always engage on a datacenter CI box even + # though host candidates gather), so treat a missing srflx as a skip, not a + # failure — the local run validates it where the path is real. + if not any(c["address"] == _FAKE_EGRESS for c in srflx_candidates(cands)): + pytest.skip("synthetic srflx not engaged in this environment " + "(needs the remote origin fully through the proxy); validated locally") + assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS diff --git a/tests/unit/test_config_public.py b/tests/unit/test_config_public.py new file mode 100644 index 0000000..0e26e36 --- /dev/null +++ b/tests/unit/test_config_public.py @@ -0,0 +1,125 @@ +"""Unit tests for the public ``config`` helpers.""" + +import pytest + +from invisible_playwright import ( + ensure_binary, + get_default_args, + get_default_stealth_prefs, +) +from invisible_playwright.config import get_default_stealth_prefs as _direct + + +pytestmark = pytest.mark.unit + + +def test_get_default_args_is_empty_list(): + """Currently no baseline CLI args, but must return a list (mutable, fresh each call).""" + args = get_default_args() + assert args == [] + assert isinstance(args, list) + args.append("--foo") + # next call must return a fresh empty list, not the mutated one + assert get_default_args() == [] + + +def test_get_default_stealth_prefs_random_seed_returns_dict(): + """No seed -> fresh random fingerprint, dict has expected stealth keys.""" + prefs = get_default_stealth_prefs() + assert isinstance(prefs, dict) + assert len(prefs) > 0 + # humanize toggle is always set explicitly + assert "invisible_playwright.humanize" in prefs + assert prefs["invisible_playwright.humanize"] is True + + +def test_get_default_stealth_prefs_seed_is_deterministic(): + """Same seed -> byte-identical prefs across calls.""" + a = get_default_stealth_prefs(seed=42) + b = get_default_stealth_prefs(seed=42) + assert a == b + + +def test_get_default_stealth_prefs_different_seeds_differ(): + """Different seeds -> different prefs.""" + a = get_default_stealth_prefs(seed=1) + b = get_default_stealth_prefs(seed=2) + assert a != b + + +def test_humanize_false_disables_prefs(): + """humanize=False removes the maxTime knob and flips the toggle to False.""" + prefs = get_default_stealth_prefs(seed=42, humanize=False) + assert prefs["invisible_playwright.humanize"] is False + assert "invisible_playwright.humanize.maxTime" not in prefs + + +def test_humanize_default_sets_max_time_1_5(): + """humanize=True -> default maxTime is 1.5s, stored as string.""" + prefs = get_default_stealth_prefs(seed=42, humanize=True) + assert prefs["invisible_playwright.humanize"] is True + assert prefs["invisible_playwright.humanize.maxTime"] == "1.5" + + +def test_humanize_float_overrides_max_time(): + """Float for humanize is the explicit cap in seconds.""" + prefs = get_default_stealth_prefs(seed=42, humanize=3.0) + assert prefs["invisible_playwright.humanize"] is True + assert prefs["invisible_playwright.humanize.maxTime"] == "3.0" + + +def test_extra_prefs_overlay_takes_precedence(): + """extra_prefs overlay LAST overrides any baseline value.""" + prefs = get_default_stealth_prefs( + seed=42, extra_prefs={"some.custom.pref": 999} + ) + assert prefs["some.custom.pref"] == 999 + + +def test_extra_prefs_can_override_baseline(): + """A key in extra_prefs that also exists in baseline gets overridden.""" + baseline = get_default_stealth_prefs(seed=42) + a_baseline_key = next(iter(baseline.keys())) + overridden = get_default_stealth_prefs( + seed=42, extra_prefs={a_baseline_key: "OVERRIDDEN_SENTINEL"} + ) + assert overridden[a_baseline_key] == "OVERRIDDEN_SENTINEL" + + +def test_locale_argument_changes_prefs(): + """Different locales produce different prefs (Accept-Language affected).""" + en = get_default_stealth_prefs(seed=42, locale="en-US") + it = get_default_stealth_prefs(seed=42, locale="it-IT") + assert en != it + + +def test_timezone_argument_changes_prefs(): + """Different timezones produce different prefs.""" + ny = get_default_stealth_prefs(seed=42, timezone="America/New_York") + rome = get_default_stealth_prefs(seed=42, timezone="Europe/Rome") + assert ny != rome + + +def test_pin_argument_forces_specific_fields(): + """Pin forces a specific field while the rest stays seed-derived.""" + plain = get_default_stealth_prefs(seed=42) + pinned = get_default_stealth_prefs( + seed=42, pin={"hardware.concurrency": 999} + ) + # something in the dict must differ vs the plain seed=42 build + assert plain != pinned + + +def test_public_import_matches_direct_import(): + """Top-level re-export and direct module import return identical output.""" + a = get_default_stealth_prefs(seed=42) + b = _direct(seed=42) + assert a == b + + +def test_ensure_binary_is_callable_via_public_namespace(): + """ensure_binary is re-exported and stays callable from the package root.""" + # We don't invoke it (would trigger a network download in CI) — just + # verify the public attribute is the same callable as the underlying. + from invisible_playwright.download import ensure_binary as _direct_eb + assert ensure_binary is _direct_eb diff --git a/tests/vendor/README.md b/tests/vendor/README.md new file mode 100644 index 0000000..8b4ae4a --- /dev/null +++ b/tests/vendor/README.md @@ -0,0 +1,18 @@ +# Vendored detection libraries (test-only) + +These are upstream, unmodified, MIT-licensed browser-fingerprinting / bot-detection +libraries, vendored so the detector e2e tests run **hermetically and identically** +on a dev box and on a GitHub runner (no external CDN at test time — Firefox +tracking-protection blocks the openfpcdn.io CDN anyway, and we want CI offline). + +They are served from a localhost HTTP server and loaded into the patched Firefox; +the tests assert the REAL detectors don't flag the stealth build (BotD: `bot===false`) +and that the fingerprint is stable (FingerprintJS: same `visitorId` across launches). + +| File | Package | Version | Source | License | +|---|---|---|---|---| +| `botd-2.0.0.esm.js` | `@fingerprintjs/botd` | 2.0.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/botd@2.0.0/dist/botd.esm.js | MIT | +| `fingerprintjs-5.2.0.umd.min.js` | `@fingerprintjs/fingerprintjs` | 5.2.0 | https://cdn.jsdelivr.net/npm/@fingerprintjs/fingerprintjs@5.2.0/dist/fp.umd.min.js | MIT | + +Both are MIT (Copyright © FingerprintJS, Inc.). To update: download the pinned +dist from jsdelivr, drop it here, and bump the version in the filename + this table. diff --git a/tests/vendor/botd-2.0.0.esm.js b/tests/vendor/botd-2.0.0.esm.js new file mode 100644 index 0000000..3064a78 --- /dev/null +++ b/tests/vendor/botd-2.0.0.esm.js @@ -0,0 +1,811 @@ +/** + * Fingerprint BotD v2.0.0 - Copyright (c) FingerprintJS, Inc, 2025 (https://fingerprint.com) + * Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license. + */ + +var version = "2.0.0"; + +/** + * Enum for types of bots. + * Specific types of bots come first, followed by automation technologies. + * + * @readonly + * @enum {string} + */ +const BotKind = { + // Object is used instead of Typescript enum to avoid emitting IIFE which might be affected by further tree-shaking. + // See example of compiled enums https://stackoverflow.com/q/47363996) + Awesomium: 'awesomium', + Cef: 'cef', + CefSharp: 'cefsharp', + CoachJS: 'coachjs', + Electron: 'electron', + FMiner: 'fminer', + Geb: 'geb', + NightmareJS: 'nightmarejs', + Phantomas: 'phantomas', + PhantomJS: 'phantomjs', + Rhino: 'rhino', + Selenium: 'selenium', + Sequentum: 'sequentum', + SlimerJS: 'slimerjs', + WebDriverIO: 'webdriverio', + WebDriver: 'webdriver', + HeadlessChrome: 'headless_chrome', + Unknown: 'unknown', +}; +/** + * Bot detection error. + */ +class BotdError extends Error { + /** + * Creates a new BotdError. + * + * @class + */ + constructor(state, message) { + super(message); + this.state = state; + this.name = 'BotdError'; + Object.setPrototypeOf(this, BotdError.prototype); + } +} + +function detect(components, detectors) { + const detections = {}; + let finalDetection = { + bot: false, + }; + for (const detectorName in detectors) { + const detector = detectors[detectorName]; + const detectorRes = detector(components); + let detection = { bot: false }; + if (typeof detectorRes === 'string') { + detection = { bot: true, botKind: detectorRes }; + } + else if (detectorRes) { + detection = { bot: true, botKind: BotKind.Unknown }; + } + detections[detectorName] = detection; + if (detection.bot) { + finalDetection = detection; + } + } + return [detections, finalDetection]; +} +async function collect(sources) { + const components = {}; + const sourcesKeys = Object.keys(sources); + await Promise.all(sourcesKeys.map(async (sourceKey) => { + const res = sources[sourceKey]; + try { + components[sourceKey] = { + value: await res(), + state: 0 /* State.Success */, + }; + } + catch (error) { + if (error instanceof BotdError) { + components[sourceKey] = { + state: error.state, + error: `${error.name}: ${error.message}`, + }; + } + else { + components[sourceKey] = { + state: -3 /* State.UnexpectedBehaviour */, + error: error instanceof Error ? `${error.name}: ${error.message}` : String(error), + }; + } + } + })); + return components; +} + +function detectAppVersion({ appVersion }) { + if (appVersion.state !== 0 /* State.Success */) + return false; + if (/headless/i.test(appVersion.value)) + return BotKind.HeadlessChrome; + if (/electron/i.test(appVersion.value)) + return BotKind.Electron; + if (/slimerjs/i.test(appVersion.value)) + return BotKind.SlimerJS; +} + +function arrayIncludes(arr, value) { + return arr.indexOf(value) !== -1; +} +function strIncludes(str, value) { + return str.indexOf(value) !== -1; +} +function arrayFind(array, callback) { + if ('find' in array) + return array.find(callback); + for (let i = 0; i < array.length; i++) { + if (callback(array[i], i, array)) + return array[i]; + } + return undefined; +} + +function getObjectProps(obj) { + return Object.getOwnPropertyNames(obj); +} +function includes(arr, ...keys) { + for (const key of keys) { + if (typeof key === 'string') { + if (arrayIncludes(arr, key)) + return true; + } + else { + const match = arrayFind(arr, (value) => key.test(value)); + if (match != null) + return true; + } + } + return false; +} +function countTruthy(values) { + return values.reduce((sum, value) => sum + (value ? 1 : 0), 0); +} + +function detectDocumentAttributes({ documentElementKeys }) { + if (documentElementKeys.state !== 0 /* State.Success */) + return false; + if (includes(documentElementKeys.value, 'selenium', 'webdriver', 'driver')) { + return BotKind.Selenium; + } +} + +function detectErrorTrace({ errorTrace }) { + if (errorTrace.state !== 0 /* State.Success */) + return false; + if (/PhantomJS/i.test(errorTrace.value)) + return BotKind.PhantomJS; +} + +function detectEvalLengthInconsistency({ evalLength, browserKind, browserEngineKind, }) { + if (evalLength.state !== 0 /* State.Success */ || + browserKind.state !== 0 /* State.Success */ || + browserEngineKind.state !== 0 /* State.Success */) + return; + const length = evalLength.value; + if (browserEngineKind.value === "unknown" /* BrowserEngineKind.Unknown */) + return false; + return ((length === 37 && !arrayIncludes(["webkit" /* BrowserEngineKind.Webkit */, "gecko" /* BrowserEngineKind.Gecko */], browserEngineKind.value)) || + (length === 39 && !arrayIncludes(["internet_explorer" /* BrowserKind.IE */], browserKind.value)) || + (length === 33 && !arrayIncludes(["chromium" /* BrowserEngineKind.Chromium */], browserEngineKind.value))); +} + +function detectFunctionBind({ functionBind }) { + if (functionBind.state === -2 /* State.NotFunction */) + return BotKind.PhantomJS; +} + +function detectLanguagesLengthInconsistency({ languages }) { + if (languages.state === 0 /* State.Success */ && languages.value.length === 0) { + return BotKind.HeadlessChrome; + } +} + +function detectMimeTypesConsistent({ mimeTypesConsistent }) { + if (mimeTypesConsistent.state === 0 /* State.Success */ && !mimeTypesConsistent.value) { + return BotKind.Unknown; + } +} + +function detectNotificationPermissions({ notificationPermissions, browserKind, }) { + if (browserKind.state !== 0 /* State.Success */ || browserKind.value !== "chrome" /* BrowserKind.Chrome */) + return false; + if (notificationPermissions.state === 0 /* State.Success */ && notificationPermissions.value) { + return BotKind.HeadlessChrome; + } +} + +function detectPluginsArray({ pluginsArray }) { + if (pluginsArray.state === 0 /* State.Success */ && !pluginsArray.value) + return BotKind.HeadlessChrome; +} + +function detectPluginsLengthInconsistency({ pluginsLength, android, browserKind, browserEngineKind, }) { + if (pluginsLength.state !== 0 /* State.Success */ || + android.state !== 0 /* State.Success */ || + browserKind.state !== 0 /* State.Success */ || + browserEngineKind.state !== 0 /* State.Success */) + return; + if (browserKind.value !== "chrome" /* BrowserKind.Chrome */ || + android.value || + browserEngineKind.value !== "chromium" /* BrowserEngineKind.Chromium */) + return; + if (pluginsLength.value === 0) + return BotKind.HeadlessChrome; +} + +function detectProcess({ process }) { + var _a; + if (process.state !== 0 /* State.Success */) + return false; + if (process.value.type === 'renderer' || ((_a = process.value.versions) === null || _a === void 0 ? void 0 : _a.electron) != null) + return BotKind.Electron; +} + +function detectProductSub({ productSub, browserKind }) { + if (productSub.state !== 0 /* State.Success */ || browserKind.state !== 0 /* State.Success */) + return false; + if ((browserKind.value === "chrome" /* BrowserKind.Chrome */ || + browserKind.value === "safari" /* BrowserKind.Safari */ || + browserKind.value === "opera" /* BrowserKind.Opera */ || + browserKind.value === "wechat" /* BrowserKind.WeChat */) && + productSub.value !== '20030107') + return BotKind.Unknown; +} + +function detectUserAgent({ userAgent }) { + if (userAgent.state !== 0 /* State.Success */) + return false; + if (/PhantomJS/i.test(userAgent.value)) + return BotKind.PhantomJS; + if (/Headless/i.test(userAgent.value)) + return BotKind.HeadlessChrome; + if (/Electron/i.test(userAgent.value)) + return BotKind.Electron; + if (/slimerjs/i.test(userAgent.value)) + return BotKind.SlimerJS; +} + +function detectWebDriver({ webDriver }) { + if (webDriver.state === 0 /* State.Success */ && webDriver.value) + return BotKind.HeadlessChrome; +} + +function detectWebGL({ webGL }) { + if (webGL.state === 0 /* State.Success */) { + const { vendor, renderer } = webGL.value; + if (vendor == 'Brian Paul' && renderer == 'Mesa OffScreen') { + return BotKind.HeadlessChrome; + } + } +} + +function detectWindowExternal({ windowExternal }) { + if (windowExternal.state !== 0 /* State.Success */) + return false; + if (/Sequentum/i.test(windowExternal.value)) + return BotKind.Sequentum; +} + +function detectWindowSize({ windowSize, documentFocus }) { + if (windowSize.state !== 0 /* State.Success */ || documentFocus.state !== 0 /* State.Success */) + return false; + const { outerWidth, outerHeight } = windowSize.value; + // When a page is opened in a new tab without focusing it right away, the window outer size is 0x0 + if (!documentFocus.value) + return; + if (outerWidth === 0 && outerHeight === 0) + return BotKind.HeadlessChrome; +} + +function detectDistinctiveProperties({ distinctiveProps }) { + if (distinctiveProps.state !== 0 /* State.Success */) + return false; + const value = distinctiveProps.value; + let bot; + for (bot in value) + if (value[bot]) + return bot; +} + +// eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types +const detectors = { + detectAppVersion, + detectDocumentAttributes, + detectErrorTrace, + detectEvalLengthInconsistency, + detectFunctionBind, + detectLanguagesLengthInconsistency, + detectNotificationPermissions, + detectPluginsArray, + detectPluginsLengthInconsistency, + detectProcess, + detectUserAgent, + detectWebDriver, + detectWebGL, + detectWindowExternal, + detectWindowSize, + detectMimeTypesConsistent, + detectProductSub, + detectDistinctiveProperties, +}; + +function getAppVersion() { + const appVersion = navigator.appVersion; + if (appVersion == undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.appVersion is undefined'); + } + return appVersion; +} + +function getDocumentElementKeys() { + if (document.documentElement === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'document.documentElement is undefined'); + } + const { documentElement } = document; + if (typeof documentElement.getAttributeNames !== 'function') { + throw new BotdError(-2 /* State.NotFunction */, 'document.documentElement.getAttributeNames is not a function'); + } + return documentElement.getAttributeNames(); +} + +function getErrorTrace() { + try { + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + null[0](); + } + catch (error) { + if (error instanceof Error && error['stack'] != null) { + return error.stack.toString(); + } + } + throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'errorTrace signal unexpected behaviour'); +} + +function getEvalLength() { + return eval.toString().length; +} + +function getFunctionBind() { + if (Function.prototype.bind === undefined) { + throw new BotdError(-2 /* State.NotFunction */, 'Function.prototype.bind is undefined'); + } + return Function.prototype.bind.toString(); +} + +function getBrowserEngineKind() { + var _a, _b; + // Based on research in October 2020. Tested to detect Chromium 42-86. + const w = window; + const n = navigator; + if (countTruthy([ + 'webkitPersistentStorage' in n, + 'webkitTemporaryStorage' in n, + n.vendor.indexOf('Google') === 0, + 'webkitResolveLocalFileSystemURL' in w, + 'BatteryManager' in w, + 'webkitMediaStream' in w, + 'webkitSpeechGrammar' in w, + ]) >= 5) { + return "chromium" /* BrowserEngineKind.Chromium */; + } + if (countTruthy([ + 'ApplePayError' in w, + 'CSSPrimitiveValue' in w, + 'Counter' in w, + n.vendor.indexOf('Apple') === 0, + 'getStorageUpdates' in n, + 'WebKitMediaKeys' in w, + ]) >= 4) { + return "webkit" /* BrowserEngineKind.Webkit */; + } + if (countTruthy([ + 'buildID' in navigator, + 'MozAppearance' in ((_b = (_a = document.documentElement) === null || _a === void 0 ? void 0 : _a.style) !== null && _b !== void 0 ? _b : {}), + 'onmozfullscreenchange' in w, + 'mozInnerScreenX' in w, + 'CSSMozDocumentRule' in w, + 'CanvasCaptureMediaStream' in w, + ]) >= 4) { + return "gecko" /* BrowserEngineKind.Gecko */; + } + return "unknown" /* BrowserEngineKind.Unknown */; +} +function getBrowserKind() { + var _a; + const userAgent = (_a = navigator.userAgent) === null || _a === void 0 ? void 0 : _a.toLowerCase(); + if (strIncludes(userAgent, 'edg/')) { + return "edge" /* BrowserKind.Edge */; + } + else if (strIncludes(userAgent, 'trident') || strIncludes(userAgent, 'msie')) { + return "internet_explorer" /* BrowserKind.IE */; + } + else if (strIncludes(userAgent, 'wechat')) { + return "wechat" /* BrowserKind.WeChat */; + } + else if (strIncludes(userAgent, 'firefox')) { + return "firefox" /* BrowserKind.Firefox */; + } + else if (strIncludes(userAgent, 'opera') || strIncludes(userAgent, 'opr')) { + return "opera" /* BrowserKind.Opera */; + } + else if (strIncludes(userAgent, 'chrome')) { + return "chrome" /* BrowserKind.Chrome */; + } + else if (strIncludes(userAgent, 'safari')) { + return "safari" /* BrowserKind.Safari */; + } + else { + return "unknown" /* BrowserKind.Unknown */; + } +} +// Source: https://github.com/fingerprintjs/fingerprintjs/blob/master/src/utils/browser.ts#L223 +function isAndroid() { + const browserEngineKind = getBrowserEngineKind(); + const isItChromium = browserEngineKind === "chromium" /* BrowserEngineKind.Chromium */; + const isItGecko = browserEngineKind === "gecko" /* BrowserEngineKind.Gecko */; + const w = window; + const n = navigator; + const c = 'connection'; + // Chrome removes all words "Android" from `navigator` when desktop version is requested + // Firefox keeps "Android" in `navigator.appVersion` when desktop version is requested + if (isItChromium) { + return (countTruthy([ + !('SharedWorker' in w), + // `typechange` is deprecated, but it's still present on Android (tested on Chrome Mobile 117) + // Removal proposal https://bugs.chromium.org/p/chromium/issues/detail?id=699892 + // Note: this expression returns true on ChromeOS, so additional detectors are required to avoid false-positives + n[c] && 'ontypechange' in n[c], + !('sinkId' in new Audio()), + ]) >= 2); + } + else if (isItGecko) { + return countTruthy(['onorientationchange' in w, 'orientation' in w, /android/i.test(n.appVersion)]) >= 2; + } + else { + // Only 2 browser engines are presented on Android. + // Actually, there is also Android 4.1 browser, but it's not worth detecting it at the moment. + return false; + } +} +function getDocumentFocus() { + if (document.hasFocus === undefined) { + return false; + } + return document.hasFocus(); +} +function isChromium86OrNewer() { + // Checked in Chrome 85 vs Chrome 86 both on desktop and Android. Checked in macOS Chrome 128, Android Chrome 127. + const w = window; + return (countTruthy([ + !('MediaSettingsRange' in w), + 'RTCEncodedAudioFrame' in w, + '' + w.Intl === '[object Intl]', + '' + w.Reflect === '[object Reflect]', + ]) >= 3); +} + +function getLanguages() { + const n = navigator; + const result = []; + const language = n.language || n.userLanguage || n.browserLanguage || n.systemLanguage; + if (language !== undefined) { + result.push([language]); + } + if (Array.isArray(n.languages)) { + const browserEngine = getBrowserEngineKind(); + // Starting from Chromium 86, there is only a single value in `navigator.language` in Incognito mode: + // the value of `navigator.language`. Therefore, the value is ignored in this browser. + if (!(browserEngine === "chromium" /* BrowserEngineKind.Chromium */ && isChromium86OrNewer())) { + result.push(n.languages); + } + } + else if (typeof n.languages === 'string') { + const languages = n.languages; + if (languages) { + result.push(languages.split(',')); + } + } + return result; +} + +function areMimeTypesConsistent() { + if (navigator.mimeTypes === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.mimeTypes is undefined'); + } + const { mimeTypes } = navigator; + let isConsistent = Object.getPrototypeOf(mimeTypes) === MimeTypeArray.prototype; + for (let i = 0; i < mimeTypes.length; i++) { + isConsistent && (isConsistent = Object.getPrototypeOf(mimeTypes[i]) === MimeType.prototype); + } + return isConsistent; +} + +async function getNotificationPermissions() { + if (window.Notification === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'window.Notification is undefined'); + } + if (navigator.permissions === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.permissions is undefined'); + } + const { permissions } = navigator; + if (typeof permissions.query !== 'function') { + throw new BotdError(-2 /* State.NotFunction */, 'navigator.permissions.query is not a function'); + } + try { + const permissionStatus = await permissions.query({ name: 'notifications' }); + return window.Notification.permission === 'denied' && permissionStatus.state === 'prompt'; + } + catch (e) { + throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'notificationPermissions signal unexpected behaviour'); + } +} + +function getPluginsArray() { + if (navigator.plugins === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.plugins is undefined'); + } + if (window.PluginArray === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'window.PluginArray is undefined'); + } + return navigator.plugins instanceof PluginArray; +} + +function getPluginsLength() { + if (navigator.plugins === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.plugins is undefined'); + } + if (navigator.plugins.length === undefined) { + throw new BotdError(-3 /* State.UnexpectedBehaviour */, 'navigator.plugins.length is undefined'); + } + return navigator.plugins.length; +} + +function getProcess() { + const { process } = window; + const errorPrefix = 'window.process is'; + if (process === undefined) { + throw new BotdError(-1 /* State.Undefined */, `${errorPrefix} undefined`); + } + if (process && typeof process !== 'object') { + throw new BotdError(-3 /* State.UnexpectedBehaviour */, `${errorPrefix} not an object`); + } + return process; +} + +function getProductSub() { + const { productSub } = navigator; + if (productSub === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.productSub is undefined'); + } + return productSub; +} + +function getRTT() { + if (navigator.connection === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.connection is undefined'); + } + if (navigator.connection.rtt === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.connection.rtt is undefined'); + } + return navigator.connection.rtt; +} + +function getUserAgent() { + return navigator.userAgent; +} + +function getWebDriver() { + if (navigator.webdriver == undefined) { + throw new BotdError(-1 /* State.Undefined */, 'navigator.webdriver is undefined'); + } + return navigator.webdriver; +} + +function getWebGL() { + const canvasElement = document.createElement('canvas'); + if (typeof canvasElement.getContext !== 'function') { + throw new BotdError(-2 /* State.NotFunction */, 'HTMLCanvasElement.getContext is not a function'); + } + const webGLContext = canvasElement.getContext('webgl'); + if (webGLContext === null) { + throw new BotdError(-4 /* State.Null */, 'WebGLRenderingContext is null'); + } + if (typeof webGLContext.getParameter !== 'function') { + throw new BotdError(-2 /* State.NotFunction */, 'WebGLRenderingContext.getParameter is not a function'); + } + const vendor = webGLContext.getParameter(webGLContext.VENDOR); + const renderer = webGLContext.getParameter(webGLContext.RENDERER); + return { vendor: vendor, renderer: renderer }; +} + +function getWindowExternal() { + if (window.external === undefined) { + throw new BotdError(-1 /* State.Undefined */, 'window.external is undefined'); + } + const { external } = window; + if (typeof external.toString !== 'function') { + throw new BotdError(-2 /* State.NotFunction */, 'window.external.toString is not a function'); + } + return external.toString(); +} + +function getWindowSize() { + return { + outerWidth: window.outerWidth, + outerHeight: window.outerHeight, + innerWidth: window.innerWidth, + innerHeight: window.innerHeight, + }; +} + +function checkDistinctiveProperties() { + // The order in the following list matters, because specific types of bots come first, followed by automation technologies. + const distinctivePropsList = { + [BotKind.Awesomium]: { + window: ['awesomium'], + }, + [BotKind.Cef]: { + window: ['RunPerfTest'], + }, + [BotKind.CefSharp]: { + window: ['CefSharp'], + }, + [BotKind.CoachJS]: { + window: ['emit'], + }, + [BotKind.FMiner]: { + window: ['fmget_targets'], + }, + [BotKind.Geb]: { + window: ['geb'], + }, + [BotKind.NightmareJS]: { + window: ['__nightmare', 'nightmare'], + }, + [BotKind.Phantomas]: { + window: ['__phantomas'], + }, + [BotKind.PhantomJS]: { + window: ['callPhantom', '_phantom'], + }, + [BotKind.Rhino]: { + window: ['spawn'], + }, + [BotKind.Selenium]: { + window: ['_Selenium_IDE_Recorder', '_selenium', 'calledSelenium', /^([a-z]){3}_.*_(Array|Promise|Symbol)$/], + document: ['__selenium_evaluate', 'selenium-evaluate', '__selenium_unwrapped'], + }, + [BotKind.WebDriverIO]: { + window: ['wdioElectron'], + }, + [BotKind.WebDriver]: { + window: [ + 'webdriver', + '__webdriverFunc', + '__lastWatirAlert', + '__lastWatirConfirm', + '__lastWatirPrompt', + '_WEBDRIVER_ELEM_CACHE', + 'ChromeDriverw', + ], + document: [ + '__webdriver_script_fn', + '__driver_evaluate', + '__webdriver_evaluate', + '__fxdriver_evaluate', + '__driver_unwrapped', + '__webdriver_unwrapped', + '__fxdriver_unwrapped', + '__webdriver_script_fn', + '__webdriver_script_func', + '__webdriver_script_function', + '$cdc_asdjflasutopfhvcZLmcf', + '$cdc_asdjflasutopfhvcZLmcfl_', + '$chrome_asyncScriptInfo', + '__$webdriverAsyncExecutor', + ], + }, + [BotKind.HeadlessChrome]: { + window: ['domAutomation', 'domAutomationController'], + }, + }; + let botName; + const result = {}; + const windowProps = getObjectProps(window); + let documentProps = []; + if (window.document !== undefined) + documentProps = getObjectProps(window.document); + for (botName in distinctivePropsList) { + const props = distinctivePropsList[botName]; + if (props !== undefined) { + const windowContains = props.window === undefined ? false : includes(windowProps, ...props.window); + const documentContains = props.document === undefined || !documentProps.length ? false : includes(documentProps, ...props.document); + result[botName] = windowContains || documentContains; + } + } + return result; +} + +const sources = { + android: isAndroid, + browserKind: getBrowserKind, + browserEngineKind: getBrowserEngineKind, + documentFocus: getDocumentFocus, + userAgent: getUserAgent, + appVersion: getAppVersion, + rtt: getRTT, + windowSize: getWindowSize, + pluginsLength: getPluginsLength, + pluginsArray: getPluginsArray, + errorTrace: getErrorTrace, + productSub: getProductSub, + windowExternal: getWindowExternal, + mimeTypesConsistent: areMimeTypesConsistent, + evalLength: getEvalLength, + webGL: getWebGL, + webDriver: getWebDriver, + languages: getLanguages, + notificationPermissions: getNotificationPermissions, + documentElementKeys: getDocumentElementKeys, + functionBind: getFunctionBind, + process: getProcess, + distinctiveProps: checkDistinctiveProperties, +}; + +/** + * Class representing a bot detector. + * + * @class + * @implements {BotDetectorInterface} + */ +class BotDetector { + constructor() { + this.components = undefined; + this.detections = undefined; + } + getComponents() { + return this.components; + } + getDetections() { + return this.detections; + } + /** + * @inheritdoc + */ + detect() { + if (this.components === undefined) { + throw new Error("BotDetector.detect can't be called before BotDetector.collect"); + } + const [detections, finalDetection] = detect(this.components, detectors); + this.detections = detections; + return finalDetection; + } + /** + * @inheritdoc + */ + async collect() { + this.components = await collect(sources); + return this.components; + } +} + +/** + * Sends an unpersonalized AJAX request to collect installation statistics + */ +function monitor() { + // The FingerprintJS CDN (https://github.com/fingerprintjs/cdn) replaces `window.__fpjs_d_m` with `true` + if (window.__fpjs_d_m || Math.random() >= 0.001) { + return; + } + try { + const request = new XMLHttpRequest(); + request.open('get', `https://m1.openfpcdn.io/botd/v${version}/npm-monitoring`, true); + request.send(); + } + catch (error) { + // console.error is ok here because it's an unexpected error handler + // eslint-disable-next-line no-console + console.error(error); + } +} +async function load({ monitoring = true } = {}) { + if (monitoring) { + monitor(); + } + const detector = new BotDetector(); + await detector.collect(); + return detector; +} +var index = { load }; + +export { BotKind, BotdError, collect, index as default, detect, detectors, load, sources }; diff --git a/tests/vendor/fingerprintjs-5.2.0.umd.min.js b/tests/vendor/fingerprintjs-5.2.0.umd.min.js new file mode 100644 index 0000000..9975db8 --- /dev/null +++ b/tests/vendor/fingerprintjs-5.2.0.umd.min.js @@ -0,0 +1,27 @@ +/** + * FingerprintJS v5.2.0 - Copyright (c) FingerprintJS, Inc, 2026 (https://fingerprint.com) + * + * Licensed under MIT License + * + * Copyright (c) 2025 FingerprintJS, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +!function(t,e){"object"==typeof exports&&"undefined"!=typeof module?e(exports):"function"==typeof define&&define.amd?define(["exports"],e):e((t="undefined"!=typeof globalThis?globalThis:t||self).FingerprintJS={})}(this,(function(t){"use strict";var e="5.2.0";function n(t,e){return new Promise((n=>setTimeout(n,t,e)))}function o(t){return!!t&&"function"==typeof t.then}function i(t,e){try{const n=t();o(n)?n.then((t=>e(!0,t)),(t=>e(!1,t))):e(!0,n)}catch(n){e(!1,n)}}async function r(t,e,n=16){const o=Array(t.length);let i=Date.now();for(let r=0;r=i+n&&(i=a,await new Promise((t=>{const e=new MessageChannel;e.port1.onmessage=()=>t(),e.port2.postMessage(null)})))}return o}function a(t){return t.then(void 0,(()=>{})),t}function c(t){return parseInt(t)}function s(t){return parseFloat(t)}function u(t,e){return"number"==typeof t&&isNaN(t)?e:t}function l(t){return t.reduce(((t,e)=>t+(e?1:0)),0)}function d(t,e=1){if(Math.abs(e)>=1)return Math.round(t/e)*e;{const n=1/e;return Math.round(t*n)/n}}function m(t,e){const n=t[0]>>>16,o=65535&t[0],i=t[1]>>>16,r=65535&t[1],a=e[0]>>>16,c=65535&e[0],s=e[1]>>>16;let u=0,l=0,d=0,m=0;m+=r+(65535&e[1]),d+=m>>>16,m&=65535,d+=i+s,l+=d>>>16,d&=65535,l+=o+c,u+=l>>>16,l&=65535,u+=n+a,u&=65535,t[0]=u<<16|l,t[1]=d<<16|m}function f(t,e){const n=t[0]>>>16,o=65535&t[0],i=t[1]>>>16,r=65535&t[1],a=e[0]>>>16,c=65535&e[0],s=e[1]>>>16,u=65535&e[1];let l=0,d=0,m=0,f=0;f+=r*u,m+=f>>>16,f&=65535,m+=i*u,d+=m>>>16,m&=65535,m+=r*s,d+=m>>>16,m&=65535,d+=o*u,l+=d>>>16,d&=65535,d+=i*s,l+=d>>>16,d&=65535,d+=r*c,l+=d>>>16,d&=65535,l+=n*u+o*s+i*c+r*a,l&=65535,t[0]=l<<16|d,t[1]=m<<16|f}function p(t,e){const n=t[0];32===(e%=64)?(t[0]=t[1],t[1]=n):e<32?(t[0]=n<>>32-e,t[1]=t[1]<>>32-e):(e-=32,t[0]=t[1]<>>32-e,t[1]=n<>>32-e)}function h(t,e){0!==(e%=64)&&(e<32?(t[0]=t[1]>>>32-e,t[1]=t[1]<>>1];b(t,e),f(t,y),e[1]=t[0]>>>1,b(t,e),f(t,g),e[1]=t[0]>>>1,b(t,e)}const w=[2277735313,289559509],L=[1291169091,658871167],k=[0,5],V=[0,1390208809],S=[0,944331445];function W(t,e){const n=function(t){const e=new Uint8Array(t.length);for(let n=0;n127)return(new TextEncoder).encode(t);e[n]=o}return e}(t);e=e||0;const o=[0,n.length],i=o[1]%16,r=o[1]-i,a=[0,e],c=[0,e],s=[0,0],u=[0,0];let l;for(l=0;l>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(c[0]>>>0).toString(16)).slice(-8)+("00000000"+(c[1]>>>0).toString(16)).slice(-8)}function x(t){return"function"!=typeof t}function Z(t,e,n,o){const c=Object.keys(t).filter((t=>!function(t,e){for(let n=0,o=t.length;nfunction(t,e){const n=a(new Promise((n=>{const o=Date.now();i(t.bind(null,e),((...t)=>{const e=Date.now()-o;if(!t[0])return n((()=>({error:t[1],duration:e})));const r=t[1];if(x(r))return n((()=>({value:r,duration:e})));n((()=>new Promise((t=>{const n=Date.now();i(r,((...o)=>{const i=e+Date.now()-n;if(!o[0])return t({error:o[1],duration:i});t({value:o[1],duration:i})}))}))))}))})));return function(){return n.then((t=>t()))}}(t[n],e)),o));return async function(){const t=await s,e=await r(t,(t=>a(t())),o),n=await Promise.all(e),i={};for(let o=0;o=4}function R(){const t=window,e=navigator;return l(["msWriteProfilerMark"in t,"MSStream"in t,"msLaunchUri"in e,"msSaveBlob"in e])>=3&&!M()}function F(){const t=window,e=navigator;return l(["webkitPersistentStorage"in e,"webkitTemporaryStorage"in e,0===(e.vendor||"").indexOf("Google"),"webkitResolveLocalFileSystemURL"in t,"BatteryManager"in t,"webkitMediaStream"in t,"webkitSpeechGrammar"in t])>=5}function G(){const t=window;return l(["ApplePayError"in t,"CSSPrimitiveValue"in t,"Counter"in t,0===navigator.vendor.indexOf("Apple"),"RGBColor"in t,"WebKitMediaKeys"in t])>=4}function I(){const t=window,{HTMLElement:e,Document:n}=t;return l(["safari"in t,!("ongestureend"in t),!("TouchEvent"in t),!("orientation"in t),e&&!("autocapitalize"in e.prototype),n&&"pointerLockElement"in n.prototype])>=4}function C(){const t=window;return e=t.print,/^function\s.*?\{\s*\[native code]\s*}$/.test(String(e))&&"[object WebPageNamespace]"===String(t.browser);var e}function Y(){var t,e;const n=window;return l(["buildID"in navigator,"MozAppearance"in(null!==(e=null===(t=document.documentElement)||void 0===t?void 0:t.style)&&void 0!==e?e:{}),"onmozfullscreenchange"in n,"mozInnerScreenX"in n,"CSSMozDocumentRule"in n,"CanvasCaptureMediaStream"in n])>=4}function P(){const{CSS:t}=window;return l([t.supports("selector(::details-content)"),t.supports("selector(::before::marker)"),t.supports("selector(::after::marker)"),!("locale"in CompositionEvent.prototype)])>=3}function X(){const t=window,e=document,{CSS:n,Promise:o,AudioContext:i}=t;return l([o&&"try"in o,"caretPositionFromPoint"in e,i&&"onerror"in i.prototype,n.supports("ruby-align","space-around")])>=3}function j(){const t=window,e=navigator,{CSS:n,HTMLButtonElement:o}=t;return l([!("getStorageUpdates"in e),o&&"popover"in o.prototype,"CSSCounterStyleRule"in t,n.supports("font-size-adjust: ex-height 0.5"),n.supports("text-transform: full-width")])>=4}function E(){const t=document;return t.fullscreenElement||t.msFullscreenElement||t.mozFullScreenElement||t.webkitFullscreenElement||null}function H(){const t=F(),e=Y(),n=window,o=navigator,i="connection";return t?l([!("SharedWorker"in n),o[i]&&"ontypechange"in o[i],!("sinkId"in new Audio)])>=2:!!e&&l(["onorientationchange"in n,"orientation"in n,/android/i.test(o.appVersion)])>=2}function A(){const t=navigator,e=window,n=Audio.prototype,{visualViewport:o}=e;return l(["srLatency"in n,"srChannelCount"in n,"devicePosture"in t,o&&"segments"in o,"getTextInformation"in Image.prototype])>=3}function N(){const t=window,e=t.OfflineAudioContext||t.webkitOfflineAudioContext;if(!e)return-2;if(G()&&!I()&&!function(){const t=window;return l(["DOMRectList"in t,"RTCPeerConnectionIceEvent"in t,"SVGGeometryElement"in t,"ontransitioncancel"in t])>=3}())return-1;const n=new e(1,5e3,44100),i=n.createOscillator();i.type="triangle",i.frequency.value=1e4;const r=n.createDynamicsCompressor();r.threshold.value=-50,r.knee.value=40,r.ratio.value=12,r.attack.value=0,r.release.value=.25,i.connect(r),r.connect(n.destination),i.start(0);const[c,s]=function(t){const e=3,n=500,i=500,r=5e3;let c=()=>{};const s=new Promise(((s,u)=>{let l=!1,d=0,m=0;t.oncomplete=t=>s(t.renderedBuffer);const f=()=>{setTimeout((()=>u(J("timeout"))),Math.min(i,m+r-Date.now()))},p=()=>{try{const i=t.startRendering();switch(o(i)&&a(i),t.state){case"running":m=Date.now(),l&&f();break;case"suspended":document.hidden||d++,l&&d>=e?u(J("suspended")):setTimeout(p,n)}}catch(i){u(i)}};p(),c=()=>{l||(l=!0,m>0&&f())}}));return[s,c]}(n),u=a(c.then((t=>function(t){let e=0;for(let n=0;n{if("timeout"===t.name||"suspended"===t.name)return-3;throw t})));return()=>(s(),u)}function J(t){const e=new Error(t);return e.name=t,e}async function T(t,e,o=50){var i,r,a;const c=document;for(;!c.body;)await n(o);const s=c.createElement("iframe");try{for((await new Promise(((t,n)=>{let o=!1;const i=()=>{o=!0,t()};s.onload=i,s.onerror=t=>{o=!0,n(t)};const{style:r}=s;r.setProperty("display","block","important"),r.position="absolute",r.top="0",r.left="0",r.visibility="hidden",e&&"srcdoc"in s?s.srcdoc=e:s.src="about:blank",c.body.appendChild(s);const a=()=>{var t,e;o||("complete"===(null===(e=null===(t=s.contentWindow)||void 0===t?void 0:t.document)||void 0===e?void 0:e.readyState)?i():setTimeout(a,10))};a()})));!(null===(r=null===(i=s.contentWindow)||void 0===i?void 0:i.document)||void 0===r?void 0:r.body);)await n(o);return await t(s,s.contentWindow)}finally{null===(a=s.parentNode)||void 0===a||a.removeChild(s)}}function _(t){const[e,n]=function(t){var e,n;const o=`Unexpected syntax '${t}'`,i=/^\s*([a-z-]*)(.*)$/i.exec(t),r=i[1]||void 0,a={},c=/([.:#][\w-]+|\[.+?\])/gi,s=(t,e)=>{a[t]=a[t]||[],a[t].push(e)};for(;;){const t=c.exec(i[2]);if(!t)break;const r=t[0];switch(r[0]){case".":s("class",r.slice(1));break;case"#":s("id",r.slice(1));break;case"[":{const t=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(r);if(!t)throw new Error(o);s(t[1],null!==(n=null!==(e=t[4])&&void 0!==e?e:t[5])&&void 0!==n?n:"");break}default:throw new Error(o)}}return[r,a]}(t),o=document.createElement(null!=e?e:"div");for(const i of Object.keys(n)){const t=n[i].join(" ");"style"===i?D(o.style,t):o.setAttribute(i,t)}return o}function D(t,e){for(const n of e.split(";")){const e=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(n);if(e){const[,n,o,,i]=e;t.setProperty(n,o,i||"")}}}const z=["monospace","sans-serif","serif"],B=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function O(t){let e,n,o=!1;const[i,r]=function(){const t=document.createElement("canvas");return t.width=1,t.height=1,[t,t.getContext("2d")]}();return!function(t,e){return!(!e||!t.toDataURL)}(i,r)?e=n="unsupported":(o=function(t){return t.rect(0,0,10,10),t.rect(2,2,6,6),!t.isPointInPath(5,5,"evenodd")}(r),t?e=n="skipped":[e,n]=function(t,e){!function(t,e){t.width=240,t.height=60,e.textBaseline="alphabetic",e.fillStyle="#f60",e.fillRect(100,1,62,20),e.fillStyle="#069",e.font='11pt "Times New Roman"';const n=`Cwm fjordbank gly ${String.fromCharCode(55357,56835)}`;e.fillText(n,2,15),e.fillStyle="rgba(102, 204, 0, 0.2)",e.font="18pt Arial",e.fillText(n,4,45)}(t,e);const n=$(t),o=$(t);if(n!==o)return["unstable","unstable"];!function(t,e){t.width=122,t.height=110,e.globalCompositeOperation="multiply";for(const[n,o,i]of[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]])e.fillStyle=n,e.beginPath(),e.arc(o,i,40,0,2*Math.PI,!0),e.closePath(),e.fill();e.fillStyle="#f9c",e.arc(60,60,60,0,2*Math.PI,!0),e.arc(60,60,20,0,2*Math.PI,!0),e.fill("evenodd")}(t,e);const i=$(t);return[i,n]}(i,r)),{winding:o,geometry:e,text:n}}function $(t){return t.toDataURL()}function U(){const t=screen,e=t=>u(c(t),null),n=[e(t.width),e(t.height)];return n.sort().reverse(),n}const Q=2500;let K,q;function tt(){return function(){if(void 0!==q)return;const t=()=>{const e=et();nt(e)?q=setTimeout(t,Q):(K=e,q=void 0)};t()}(),async()=>{let t=et();if(nt(t)){if(K)return[...K];E()&&(await function(){const t=document;return(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t)}(),t=et())}return nt(t)||(K=t),t}}function et(){const t=screen;return[u(s(t.availTop),null),u(s(t.width)-s(t.availWidth)-u(s(t.availLeft),0),null),u(s(t.height)-s(t.availHeight)-u(s(t.availTop),0),null),u(s(t.availLeft),null)]}function nt(t){for(let e=0;e<4;++e)if(t[e])return!1;return!0}function ot(){return u(c(navigator.hardwareConcurrency),void 0)}function it(t){t.style.setProperty("visibility","hidden","important"),t.style.setProperty("display","block","important")}function rt(t){return matchMedia(`(inverted-colors: ${t})`).matches}function at(t){return matchMedia(`(forced-colors: ${t})`).matches}function ct(t){return matchMedia(`(prefers-contrast: ${t})`).matches}function st(t){return matchMedia(`(prefers-reduced-motion: ${t})`).matches}function ut(t){return matchMedia(`(prefers-reduced-transparency: ${t})`).matches}function lt(t){return matchMedia(`(dynamic-range: ${t})`).matches}const dt=Math,mt=()=>0;const ft="mmMwWLliI0fiflO&1",pt={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};function ht(t){const e=H()?0:3,n=Math.pow(10,e);return Math.floor(t*n)/n}const bt=function(){let t=window;for(;;){const n=t.parent;if(!n||n===t)return!1;try{if(n.location.origin!==t.location.origin)return!0}catch(e){if(e instanceof Error&&"SecurityError"===e.name)return!0;throw e}t=n}};const yt=new Set([10752,2849,2884,2885,2886,2928,2929,2930,2931,2932,2960,2961,2962,2963,2964,2965,2966,2967,2968,2978,3024,3042,3088,3089,3106,3107,32773,32777,32777,32823,32824,32936,32937,32938,32939,32968,32969,32970,32971,3317,33170,3333,3379,3386,33901,33902,34016,34024,34076,3408,3410,3411,3412,3413,3414,3415,34467,34816,34817,34818,34819,34877,34921,34930,35660,35661,35724,35738,35739,36003,36004,36005,36347,36348,36349,37440,37441,37443,7936,7937,7938]),gt=new Set([34047,35723,36063,34852,34853,34854,34229,36392,36795,38449]),vt=["FRAGMENT_SHADER","VERTEX_SHADER"],wt=["LOW_FLOAT","MEDIUM_FLOAT","HIGH_FLOAT","LOW_INT","MEDIUM_INT","HIGH_INT"],Lt="WEBGL_debug_renderer_info";function kt(t){if(t.webgl)return t.webgl.context;const e=document.createElement("canvas");let n;e.addEventListener("webglCreateContextError",(()=>n=void 0));for(const i of["webgl","experimental-webgl"]){try{n=e.getContext(i)}catch(o){}if(n)break}return t.webgl={context:n},n}function Vt(t,e,n){const o=t.getShaderPrecisionFormat(t[e],t[n]);return o?[o.rangeMin,o.rangeMax,o.precision]:[]}function St(t){return Object.keys(t.__proto__).filter(Wt)}function Wt(t){return"string"==typeof t&&!t.match(/[^A-Z0-9_x]/)}function xt(){return Y()}function Zt(t){return"function"==typeof t.getParameter}const Mt={userAgentData:async function(){const t=navigator.userAgentData;if(!t)return;const e=t.brands.filter((({brand:t})=>!function(t){return/not/i.test(t)}(t))).map((({brand:t})=>t)),n={brands:e.length>1?e.filter((t=>"Chromium"!==t)):e,mobile:t.mobile,platform:t.platform};if(t.getHighEntropyValues)try{const e=await t.getHighEntropyValues(["architecture","bitness","model","platformVersion"]);n.architecture=e.architecture,n.bitness=e.bitness,n.model=e.model,n.platformVersion=e.platformVersion}catch(o){if(!(o instanceof DOMException&&"NotAllowedError"===o.name))throw o;n.highEntropyStatus="not_allowed"}return n},fonts:function(){return T((async(t,{document:e})=>{const n=e.body;n.style.fontSize="48px";const o=e.createElement("div");o.style.setProperty("visibility","hidden","important");const i={},r={},a=t=>{const n=e.createElement("span"),{style:i}=n;return i.position="absolute",i.top="0",i.left="0",i.fontFamily=t,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},c=(t,e)=>a(`'${t}',${e}`),s=z.map(a),u=(()=>{const t={};for(const e of B)t[e]=z.map((t=>c(e,t)));return t})();n.appendChild(o);for(let l=0;l{return e=u[t],z.some(((t,n)=>e[n].offsetWidth!==i[t]||e[n].offsetHeight!==r[t]));var e}))}))},domBlockers:async function({debug:t}={}){if(!G()&&!H())return;const e=function(){const t=atob;return{abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',t("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",t("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",t("LnNwb25zb3JpdA=="),".ylamainos",t("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),t("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",t("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",t("LmhlYWRlci1ibG9ja2VkLWFk"),t("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",t("I2FkXzMwMFgyNTA="),t("I2Jhbm5lcmZsb2F0MjI="),t("I2NhbXBhaWduLWJhbm5lcg=="),t("I0FkLUNvbnRlbnQ=")],adGuardChinese:[t("LlppX2FkX2FfSA=="),t("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",t("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),t("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",t("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",t("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",t("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),t("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),t("LmFkZ29vZ2xl"),t("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[t("YW1wLWF1dG8tYWRz"),t("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",t("I2FkX2ludmlld19hcmVh")],adGuardRussian:[t("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),t("LnJlY2xhbWE="),'div[id^="smi2adblock"]',t("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[t("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),t("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",t("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),t("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),t("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",t("I3Jla2xhbWk="),t("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),t("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),t("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[t("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",t("LndpZGdldF9wb19hZHNfd2lkZ2V0"),t("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",t("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[t("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),t("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",t("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",t("I3Jla2xhbW5pLWJveA=="),t("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",t("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[t("I2FkdmVydGVudGll"),t("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",t("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",t("LnNwb25zb3JsaW5rZ3J1ZW4="),t("I3dlcmJ1bmdza3k="),t("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),t("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[t("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",t("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),t("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),t("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[t("LnJla2xhbW9zX3RhcnBhcw=="),t("LnJla2xhbW9zX251b3JvZG9z"),t("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),t("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),t("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[t("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[t("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),t("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",t("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[t("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),t("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),t("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",t("LmFkX19tYWlu"),t("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[t("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[t("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),t("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[t("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),t("I2xpdmVyZUFkV3JhcHBlcg=="),t("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),t("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[t("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",t("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),t("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),t("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[t("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),t("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),t("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",t("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),t("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),t("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),t("ZGl2I3NrYXBpZWNfYWQ=")],ro:[t("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),t("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),t("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),t("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[t("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),t("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),t("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",t("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),t("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",t("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]}}(),o=Object.keys(e),i=[].concat(...o.map((t=>e[t]))),r=await async function(t){var e;const o=document,i=o.createElement("div"),r=new Array(t.length),a={};it(i);for(let n=0;n{const n=e[t];return l(n.map((t=>r[t])))>.6*n.length}));return a.sort(),a},fontPreferences:function(){return function(t,e=4e3){return T(((n,o)=>{const i=o.document,r=i.body,a=r.style;a.width=`${e}px`,a.webkitTextSizeAdjust=a.textSizeAdjust="none",F()?r.style.zoom=""+1/o.devicePixelRatio:G()&&(r.style.zoom="reset");const c=i.createElement("div");return c.textContent=[...Array(e/20|0)].map((()=>"word")).join(" "),r.appendChild(c),t(i,r,o)}),'')}(((t,e,n)=>{const o={},i={};for(const a of Object.keys(pt)){const[n={},i=ft]=pt[a],r=t.createElement("span");r.textContent=i,r.style.whiteSpace="nowrap";for(const t of Object.keys(n)){const e=n[t];void 0!==e&&(r.style[t]=e)}o[a]=r,e.append(t.createElement("br"),r)}const r=F()&&X();for(const a of Object.keys(pt)){const t=o[a].getBoundingClientRect().width;i[a]=r?ht(t*n.devicePixelRatio):t}return i}))},audio:function(){return G()&&j()&&C()||F()&&A()&&function(){const t=window,{URLPattern:e}=t;return l(["union"in Set.prototype,"Iterator"in t,e&&"hasRegExpGroups"in e.prototype,"RGB8"in WebGLRenderingContext.prototype])>=3}()?-4:N()},screenFrame:function(){const t=G()&&j()&&C(),e=Y()&&P();if(t||e)return()=>Promise.resolve(void 0);const n=tt();return async()=>{const t=await n(),e=t=>null===t?null:d(t,10);return[e(t[0]),e(t[1]),e(t[2]),e(t[3])]}},canvas:function(){return O(function(){const t=G()&&j()&&C(),e=Y()&&function(){const t=window,e=navigator,{CSS:n}=t;return l(["userActivation"in e,n.supports("color","light-dark(#000, #fff)"),n.supports("height","1lh"),"globalPrivacyControl"in e])>=3}();return t||e}())},osCpu:function(){return navigator.oscpu},languages:function(){const t=navigator,e=[],n=t.language||t.userLanguage||t.browserLanguage||t.systemLanguage;if(void 0!==n&&e.push([n]),Array.isArray(t.languages))F()&&function(){const t=window;return l([!("MediaSettingsRange"in t),"RTCEncodedAudioFrame"in t,""+t.Intl=="[object Intl]",""+t.Reflect=="[object Reflect]"])>=3}()||e.push(t.languages);else if("string"==typeof t.languages){const n=t.languages;n&&e.push(n.split(","))}return e},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return u(s(navigator.deviceMemory),void 0)},screenResolution:function(){if(!(G()&&j()&&C()))return U()},hardwareConcurrency:function(){const t=ot();return void 0!==t&&Y()&&P()?t>=8?8:4:t},timezone:function(){var t;const e=null===(t=window.Intl)||void 0===t?void 0:t.DateTimeFormat;if(e){const t=(new e).resolvedOptions().timeZone;if(t)return t}const n=-function(){const t=(new Date).getFullYear();return Math.max(s(new Date(t,0,1).getTimezoneOffset()),s(new Date(t,6,1).getTimezoneOffset()))}();return`UTC${n>=0?"+":""}${n}`},sessionStorage:function(){try{return!!window.sessionStorage}catch(t){return!0}},localStorage:function(){try{return!!window.localStorage}catch(t){return!0}},indexedDB:function(){if(!M()&&!R())try{return!!window.indexedDB}catch(t){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){const{platform:t}=navigator;return"MacIntel"===t&&G()&&!I()?function(){if("iPad"===navigator.platform)return!0;const t=screen,e=t.width/t.height;return l(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,e>.65&&e<1.53])>=2}()?"iPad":"iPhone":t},plugins:function(){const t=navigator.plugins;if(!t)return;const e=[];for(let n=0;ndt.log(t+dt.sqrt(t*t+1)))(1),atanh:i(.5),atanhPf:(t=>dt.log((1+t)/(1-t))/2)(.5),atan:r(.5),sin:a(-1e300),sinh:c(1),sinhPf:(t=>dt.exp(t)-1/dt.exp(t)/2)(1),cos:s(10.000000000123),cosh:u(1),coshPf:(t=>(dt.exp(t)+1/dt.exp(t))/2)(1),tan:l(-1e300),tanh:d(1),tanhPf:(t=>(dt.exp(2*t)-1)/(dt.exp(2*t)+1))(1),exp:m(1),expm1:f(1),expm1Pf:(t=>dt.exp(t)-1)(1),log1p:p(10),log1pPf:(t=>dt.log(1+t))(10),powPI:(t=>dt.pow(dt.PI,t))(-100)};var h},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){const t=new Float32Array(1),e=new Uint8Array(t.buffer);return t[0]=1/0,t[0]=t[0]-t[0],e[3]},applePay:function(){const{ApplePaySession:t}=window;if("function"!=typeof(null==t?void 0:t.canMakePayments))return-1;if(bt())return-3;try{return t.canMakePayments()?1:0}catch(e){return function(t){if(t instanceof Error&&"InvalidAccessError"===t.name&&/\bfrom\b.*\binsecure\b/i.test(t.message))return-2;throw t}(e)}},privateClickMeasurement:function(){var t;const e=document.createElement("a"),n=null!==(t=e.attributionSourceId)&&void 0!==t?t:e.attributionsourceid;return void 0===n?void 0:String(n)},audioBaseLatency:function(){if(!(H()||G()))return-2;if(!window.AudioContext)return-1;const t=(new AudioContext).baseLatency;return null==t?-1:isFinite(t)?t:-3},dateTimeLocale:function(){if(!window.Intl)return-1;const t=window.Intl.DateTimeFormat;if(!t)return-2;const e=t().resolvedOptions().locale;return e||""===e?e:-3},webGlBasics:function({cache:t}){var e,n,o,i,r,a;const c=kt(t);if(!c)return-1;if(!Zt(c))return-2;const s=xt()?null:c.getExtension(Lt);return{version:(null===(e=c.getParameter(c.VERSION))||void 0===e?void 0:e.toString())||"",vendor:(null===(n=c.getParameter(c.VENDOR))||void 0===n?void 0:n.toString())||"",vendorUnmasked:s?null===(o=c.getParameter(s.UNMASKED_VENDOR_WEBGL))||void 0===o?void 0:o.toString():"",renderer:(null===(i=c.getParameter(c.RENDERER))||void 0===i?void 0:i.toString())||"",rendererUnmasked:s?null===(r=c.getParameter(s.UNMASKED_RENDERER_WEBGL))||void 0===r?void 0:r.toString():"",shadingLanguageVersion:(null===(a=c.getParameter(c.SHADING_LANGUAGE_VERSION))||void 0===a?void 0:a.toString())||""}},webGlExtensions:function({cache:t}){const e=kt(t);if(!e)return-1;if(!Zt(e))return-2;const n=e.getSupportedExtensions(),o=e.getContextAttributes(),i=[],r=[],a=[],c=[],s=[];if(o)for(const l of Object.keys(o))r.push(`${l}=${o[l]}`);const u=St(e);for(const l of u){const t=e[l];a.push(`${l}=${t}${yt.has(t)?`=${e.getParameter(t)}`:""}`)}if(n)for(const l of n){if(l===Lt&&xt()||"WEBGL_polygon_mode"===l&&(F()||G()))continue;const t=e.getExtension(l);if(t)for(const n of St(t)){const o=t[n];c.push(`${n}=${o}${gt.has(o)?`=${e.getParameter(o)}`:""}`)}else i.push(l)}for(const l of vt)for(const t of wt){const n=Vt(e,l,t);s.push(`${l}.${t}=${n.join(",")}`)}return c.sort(),a.sort(),{contextAttributes:r,parameters:a,shaderPrecisions:s,extensions:n,extensionParameters:c,unsupportedExtensions:i}}};const Rt="$ if upgrade to Pro: https://fingerprint.com/github/?utm_source=oss&utm_medium=referral&utm_campaign=confidence_score";function Ft(t){const e=function(t){if(H())return.4;if(G())return!I()||j()&&C()?.3:.5;const e="value"in t.platform?t.platform.value:"";if(/^Win/.test(e))return.6;if(/^Mac/.test(e))return.5;return.7}(t),n=function(t){return d(.99+.01*t,1e-4)}(e);return{score:e,comment:Rt.replace(/\$/g,`${n}`)}}function Gt(t){return JSON.stringify(t,((t,e)=>{return e instanceof Error?{name:(n=e).name,message:n.message,stack:null===(o=n.stack)||void 0===o?void 0:o.split("\n"),...n}:e;var n,o}),2)}function It(t){return W(function(t){let e="";for(const n of Object.keys(t).sort()){const o=t[n],i="error"in o?"error":JSON.stringify(o.value);e+=`${e?"|":""}${n.replace(/([:|\\])/g,"\\$1")}:${i}`}return e}(t))}function Ct(t=50){return function(t,e=1/0){const{requestIdleCallback:o}=window;return o?new Promise((t=>o.call(window,(()=>t()),{timeout:e}))):n(Math.min(t,e))}(t,2*t)}function Yt(t,n){const o=Date.now();return{async get(i){const r=Date.now(),a=await t(),c=function(t){let n;const o=Ft(t);return{get visitorId(){return void 0===n&&(n=It(this.components)),n},set visitorId(t){n=t},confidence:o,components:t,version:e}}(a);return(n||(null==i?void 0:i.debug))&&console.log(`Copy the text below to get the debug data:\n\n\`\`\`\nversion: ${c.version}\nuserAgent: ${navigator.userAgent}\ntimeBetweenLoadAndGet: ${r-o}\nvisitorId: ${c.visitorId}\ncomponents: ${Gt(a)}\n\`\`\``),c}}}async function Pt(t={}){const{delayFallback:n,debug:o,monitoring:i=!0}=t;i&&function(){if(!(window.__fpjs_d_m||Math.random()>=.001))try{const t=new XMLHttpRequest;t.open("get",`https://m1.openfpcdn.io/fingerprintjs/v${e}/npm-monitoring`,!0),t.send()}catch(t){console.error(t)}}(),await Ct(n);const r=function(t){return Z(Mt,t,[])}({cache:{},debug:o});return Yt(r,o)}var Xt={load:Pt,hashComponents:It,componentsToDebugString:Gt};const jt=W;t.componentsToDebugString=Gt,t.default=Xt,t.getFullscreenElement=E,t.getUnstableAudioFingerprint=N,t.getUnstableCanvasFingerprint=O,t.getUnstableHardwareConcurrency=ot,t.getUnstableScreenFrame=tt,t.getUnstableScreenResolution=U,t.getWebGLContext=kt,t.hashComponents=It,t.isAndroid=H,t.isChromium=F,t.isDesktopWebKit=I,t.isEdgeHTML=R,t.isGecko=Y,t.isSamsungInternet=A,t.isTrident=M,t.isWebKit=G,t.load=Pt,t.loadSources=Z,t.murmurX64Hash128=jt,t.prepareForSources=Ct,t.sources=Mt,t.transformSource=function(t,e){const n=t=>x(t)?e(t):()=>{const n=t();return o(n)?n.then(e):e(n)};return e=>{const i=t(e);return o(i)?i.then(n):n(i)}},t.withIframe=T,Object.defineProperty(t,"__esModule",{value:!0})}));