diff --git a/.github/ISSUE_TEMPLATE/01-launch-failure.yml b/.github/ISSUE_TEMPLATE/01-launch-failure.yml deleted file mode 100644 index 2c5451f..0000000 --- a/.github/ISSUE_TEMPLATE/01-launch-failure.yml +++ /dev/null @@ -1,98 +0,0 @@ -name: Launch failure -description: Browser or wrapper fails to start (install errors, missing deps, profile load fails, never reaches new_page) -title: "[launch] " -labels: ["bug", "launch-failure"] -body: - - type: markdown - attributes: - value: | - Use this when the browser never reaches a usable state. - If it starts and the bug appears on a site or clicking something, use the site/action template instead. - - - type: input - id: version - attributes: - label: Version - description: Output of `python -m invisible_playwright version`. - placeholder: 0.1.7 (binary firefox-7) - validations: - required: true - - - type: dropdown - id: os - attributes: - label: OS - options: - - Windows 10/11 x86_64 - - Linux x86_64 - - macOS (unsupported) - - Other - validations: - required: true - - - type: input - id: python - attributes: - label: Python - placeholder: 3.11.7 - validations: - required: true - - - type: input - id: install_cmd - attributes: - label: How you installed - placeholder: pip install invisible_playwright - validations: - required: true - - - type: textarea - id: snippet - attributes: - label: What you ran - description: Stop at the line that errors out. Redact creds. - render: python - value: | - from invisible_playwright import InvisiblePlaywright - with InvisiblePlaywright(seed=42) as browser: - ctx = browser.new_context() - validations: - required: true - - - type: textarea - id: traceback - attributes: - label: Full traceback - description: The whole stack trace verbatim. Don't summarize. - render: text - validations: - required: true - - - type: textarea - id: logs - attributes: - label: Extra logs - description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1`. Optional but speeds things up. - render: text - validations: - required: false - - - type: textarea - id: tried - attributes: - label: What you already tried - description: Reinstall, clear cache, different Python version, different proxy, etc. - validations: - required: false - - - type: checkboxes - id: confirm - attributes: - label: Before submitting - options: - - label: Searched existing issues. - required: true - - label: On the latest released version. - required: true - - label: Removed credentials and personal paths from the snippet and logs. - required: true diff --git a/.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml b/.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml deleted file mode 100644 index 6c38de6..0000000 --- a/.github/ISSUE_TEMPLATE/02-site-or-action-bug.yml +++ /dev/null @@ -1,167 +0,0 @@ -name: Site or action bug -description: Browser starts fine but a navigation, click, evaluate, or other operation fails or behaves wrong -title: "[bug] " -labels: ["bug"] -body: - - type: markdown - attributes: - value: | - For bugs that happen after the browser is up. - If the browser never launches, use the launch failure template. - If a fingerprint detector flags the browser, use the stealth detection template. - - - type: input - id: version - attributes: - label: Version - description: Output of `python -m invisible_playwright version`. - placeholder: 0.1.7 (binary firefox-7) - validations: - required: true - - - type: dropdown - id: os - attributes: - label: OS - options: - - Windows 10/11 x86_64 - - Linux x86_64 - - macOS (unsupported) - - Other - validations: - required: true - - - type: input - id: python - attributes: - label: Python - placeholder: 3.11.7 - validations: - required: true - - - type: dropdown - id: headless - attributes: - label: headless= - description: Some bugs only repro on Windows headless=True (hidden alt-desktop path). - options: - - "True" - - "False" - validations: - required: true - - - type: dropdown - id: proxy - attributes: - label: Proxy - description: Sites often vary by IP geo (e.g. GDPR consent shows only on UK/EU). - options: - - No proxy (host network) - - Residential, UK/GB - - Residential, US - - Residential, other country (specify in notes) - - Datacenter (specify provider in notes) - validations: - required: true - - - type: dropdown - id: profile - attributes: - label: Profile dir - options: - - Fresh each run (no profile_dir) - - Persistent profile_dir, reusing across runs - - Persistent profile_dir, first run creating it - validations: - required: true - - - type: input - id: url - attributes: - label: URL - description: The exact URL passed to `page.goto`. Not "the homepage" — the literal string. - placeholder: https://id.sky.com/ - validations: - required: true - - - type: textarea - id: snippet - attributes: - label: Runnable reproduction - description: A complete snippet we can copy, paste, run. Stub creds with placeholders, keep everything else literal. - render: python - value: | - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, headless=True) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto("https://example.com/") - # the exact operation that fails: - page.click("button:has-text('Accept all')") - validations: - required: true - - - type: input - id: selector - attributes: - label: Selector or locator - description: The exact string passed to locator/click/frame_locator. Write N/A if not a selector bug. - placeholder: page.frame_locator("iframe[id^='sp_message_iframe_']").get_by_text("Accept all") - validations: - required: true - - - type: textarea - id: expected - attributes: - label: Expected - description: What should happen when the snippet runs? - validations: - required: true - - - type: textarea - id: actual - attributes: - label: Actual - description: What happens instead? Full traceback, error string verbatim, any page.on('crash') firing. - validations: - required: true - - - type: textarea - id: screenshot - attributes: - label: Screenshot - description: Drag-drop a screenshot if the bug is visual. Optional but useful. - validations: - required: false - - - type: textarea - id: logs - attributes: - label: Browser logs - description: Output of `DEBUG=pw:browser* python yourscript.py 2>&1 | tail -200`. Redact creds and real IPs. - render: text - validations: - required: false - - - type: textarea - id: notes - attributes: - label: Notes - description: Anything else, hypotheses, related issues, things you've already tried. - validations: - required: false - - - type: checkboxes - id: confirm - attributes: - label: Before submitting - options: - - label: Searched existing issues. - required: true - - label: On the latest released version. - required: true - - label: The snippet above runs end-to-end on a clean Python install. - required: true - - label: Removed credentials, proxy passwords, real IPs, personal file paths. - required: true diff --git a/.github/ISSUE_TEMPLATE/03-stealth-detection.yml b/.github/ISSUE_TEMPLATE/03-stealth-detection.yml deleted file mode 100644 index b2c5e1d..0000000 --- a/.github/ISSUE_TEMPLATE/03-stealth-detection.yml +++ /dev/null @@ -1,141 +0,0 @@ -name: Stealth detection -description: A fingerprint detector flagged the browser as a bot, VM, VPN, anti-detect, tampered, or otherwise non-human -title: "[detect] " -labels: ["bug", "stealth"] -body: - - type: markdown - attributes: - value: | - Use this when something detects the browser (Fingerprint Pro, CreepJS, BotD, reCAPTCHA, Cloudflare, sannysoft, etc). - Bugs in operations (clicks, navigation) go to the site/action template. - Browser failing to start goes to the launch failure template. - - - type: input - id: version - attributes: - label: Version - placeholder: 0.1.7 (binary firefox-7) - validations: - required: true - - - type: dropdown - id: os - attributes: - label: OS - options: - - Windows 10/11 x86_64 - - Linux x86_64 - - macOS (unsupported) - - Other - validations: - required: true - - - type: dropdown - id: headless - attributes: - label: headless= - options: - - "True" - - "False" - validations: - required: true - - - type: dropdown - id: proxy - attributes: - label: Proxy - description: Datacenter or wrong-country proxies trip most detectors regardless of the browser. Be honest about what you used. - options: - - No proxy (host network) - - Residential, matching target geo - - Residential, different geo than target - - Datacenter (specify provider in notes) - - Mobile / 4G - validations: - required: true - - - type: input - id: detector - attributes: - label: Detector name and URL - description: Exact site / service / product that flagged us. - placeholder: Fingerprint Pro — https://demo.fingerprint.com/playground - validations: - required: true - - - type: textarea - id: scores - attributes: - label: Detector verdict - description: Paste the relevant flags / scores verbatim. For Fingerprint Pro paste `bot`, `vpn`, `virtual_machine`, `tampering*`, `vm_ml_score`, `suspect_score`. For CreepJS the headless / lies / trust scores. For reCAPTCHA v3 the score number. - render: text - placeholder: | - bot: bad - vpn: true - virtual_machine: true - vm_ml_score: 0.74 - suspect_score: 22 - validations: - required: true - - - type: textarea - id: screenshot - attributes: - label: Screenshot of the detector result - description: Drag-drop a screenshot of the detector page so we see what you see. - validations: - required: true - - - type: textarea - id: snippet - attributes: - label: How you launched - description: The InvisiblePlaywright launch + navigation that produced the result above. Redact creds. - render: python - value: | - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, headless=True) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto("https://demo.fingerprint.com/playground") - validations: - required: true - - - type: textarea - id: expected - attributes: - label: What you expected - description: Most detectors will never give a perfect score for any browser. Tell us what threshold you'd accept (e.g. bot=not_detected, vm_ml_score < 0.3). - validations: - required: true - - - type: textarea - id: full_report - attributes: - label: Full detector response - description: For Fingerprint Pro paste the JSON from /api/event/v4/ if you have it. For CreepJS paste the full Smart Signals block. Optional but speeds things up a lot. - render: json - validations: - required: false - - - type: textarea - id: notes - attributes: - label: Notes - validations: - required: false - - - type: checkboxes - id: confirm - attributes: - label: Before submitting - options: - - label: Searched existing issues. - required: true - - label: On the latest released version. - required: true - - label: The detector verdict above is from a real run, not a hypothesis. - required: true - - label: Removed credentials, real IPs, FpJS visitor_id values, personal file paths from the snippet and full report. - required: true diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..805d579 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,79 @@ +name: Bug report +description: Report a bug in the invisible_playwright Python wrapper +title: "[bug] " +labels: ["bug"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to file a bug report. + + Before continuing, please: + - Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues?q=is%3Aissue) to avoid duplicates. + - If the bug is in the **patched Firefox itself** (canvas/WebGL/audio/font spoofing, a detector flagging the browser), open it at [feder-cr/firefox-stealth](https://github.com/feder-cr/firefox-stealth/issues) instead. + - **Do not** report security vulnerabilities here — follow [SECURITY.md](https://github.com/feder-cr/invisible_playwright/blob/main/SECURITY.md). + - type: input + id: version + attributes: + label: invisible_playwright version + description: Output of `invisible_playwright version` + placeholder: "0.1.0 (binary 150.0.1)" + validations: + required: true + - type: dropdown + id: os + attributes: + label: Operating system + options: + - Windows x86_64 + - Linux x86_64 + - Other (please specify in description) + validations: + required: true + - type: input + id: python + attributes: + label: Python version + placeholder: "3.11.7" + validations: + required: true + - type: textarea + id: repro + attributes: + label: Minimal reproduction + description: A small, self-contained code snippet that triggers the bug. Strip out anything unrelated. + render: python + validations: + required: true + - type: textarea + id: expected + attributes: + label: Expected behavior + validations: + required: true + - type: textarea + id: actual + attributes: + label: Actual behavior + description: Include the full error message and traceback if any. + validations: + required: true + - type: textarea + id: logs + attributes: + label: Logs / additional context + description: Browser console output, environment variables, proxy config (redact credentials), etc. + render: text + validations: + required: false + - type: checkboxes + id: confirm + attributes: + label: Confirmations + options: + - label: I have searched existing issues and this bug has not been reported. + required: true + - label: I am on the latest release. + required: true + - label: I have removed any credentials, proxy passwords, or sensitive data from logs. + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 44f31be..6d3dace 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -3,9 +3,9 @@ contact_links: - name: Security vulnerability url: https://github.com/feder-cr/invisible_playwright/security/advisories/new about: Report a security issue privately. Do NOT open a public issue. - - name: Bug in the patched Firefox source (C++, IDL, Juggler JS) - url: https://github.com/feder-cr/invisible_firefox/issues - about: Source-level patches in the Firefox fork go in the invisible_firefox repo. Detection results (FpJS, CreepJS, etc.) use the stealth detection template here. + - name: Bug in the patched Firefox itself (canvas / WebGL / fonts / WebRTC / etc.) + url: https://github.com/feder-cr/firefox-stealth/issues + about: Spoofing/fingerprint bugs belong in the firefox-stealth repo. - name: Question or general discussion url: https://github.com/feder-cr/invisible_playwright/discussions - about: Usage questions, ideas, chat. Bugs and features still go in issues. + about: For usage questions, ideas, and chat. Bugs and features still go in issues. diff --git a/.github/workflows/firefox-launch-matrix.yml b/.github/workflows/firefox-launch-matrix.yml deleted file mode 100644 index 4e7b053..0000000 --- a/.github/workflows/firefox-launch-matrix.yml +++ /dev/null @@ -1,106 +0,0 @@ -name: firefox-launch-matrix - -# Cross-Windows-edition smoke for the shipped firefox-N binary. -# Triggered by issue #22 (firefox-7 SxS mismatch on Win11 build 26200, -# reporter `jannusdorfer-create`). -# -# Runs the exact reporter snippet on every Windows runner GitHub offers, -# from a fresh checkout. If any matrix cell fails the same way, the bug -# is reproducible on at least one clean-ish environment and we ship a -# sidecar mozglue.manifest fix. If all cells pass, the bug is confined -# to the reporter's specific environment (Pro/Enterprise GPO, EDR, etc.). - -on: - workflow_dispatch: - push: - branches: [main] - paths: - - '.github/workflows/firefox-launch-matrix.yml' - -jobs: - smoke: - name: launch (${{ matrix.os }}, py${{ matrix.python }}) - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [windows-2022, windows-2025, windows-latest] - python: ["3.11", "3.12", "3.13"] - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - cache: pip - - - name: Windows edition + build info - shell: pwsh - run: | - $os = Get-CimInstance Win32_OperatingSystem - Write-Host "Caption : $($os.Caption)" - Write-Host "BuildNumber: $($os.BuildNumber)" - Write-Host "OSArch : $($os.OSArchitecture)" - Write-Host "Edition : $((Get-CimInstance Win32_OperatingSystem).OperatingSystemSKU)" - Write-Host "---" - Write-Host "VC++ Redistributables installed:" - Get-ItemProperty 'HKLM:\SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\*' ` - -ErrorAction SilentlyContinue | - Where-Object { $_.DisplayName -like '*Visual C++*Redist*' } | - Select-Object DisplayName, DisplayVersion | - Format-Table -AutoSize - - - name: Install package from this commit - run: | - python -m pip install --upgrade pip - pip install . - - - name: Fetch firefox-7 binary - run: python -m invisible_playwright fetch - - - name: Verify firefox.exe can launch standalone (the snippet that fails for issue #22) - shell: pwsh - run: | - # The platformdirs path has the duplicated `invisible-playwright` segment - # on Windows (user_cache_dir convention). - $ffPath = "$env:LOCALAPPDATA\invisible-playwright\invisible-playwright\Cache\firefox-7\firefox.exe" - if (-not (Test-Path $ffPath)) { - Write-Error "firefox.exe NOT FOUND at $ffPath" - exit 1 - } - Write-Host "Launching: $ffPath --version" - # NOTE: firefox.exe --version on Windows prints the version but may - # return non-zero exit code (sub-process fork quirk). Check stdout. - $output = & $ffPath --version 2>&1 | Out-String - Write-Host "Output: $output" - if ($output -notmatch 'Mozilla Firefox \d') { - Write-Error "firefox.exe --version did not print a Mozilla Firefox version. Output was: $output" - exit 1 - } - Write-Host "OK: firefox.exe runs and prints version." - - - name: Run reporter's exact InvisiblePlaywright snippet - run: | - python -c " - import asyncio - from invisible_playwright.async_api import InvisiblePlaywright - async def main(): - async with InvisiblePlaywright(seed=9128) as browser: - page = await browser.new_page() - await page.goto('about:blank') - print('OK: page loaded, url =', page.url) - asyncio.run(main()) - " - - - name: Upload diagnostics on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: launch-failure-${{ matrix.os }}-py${{ matrix.python }} - path: | - ${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/firefox.exe - ${{ env.LOCALAPPDATA }}/invisible-playwright/invisible-playwright/Cache/firefox-7/mozglue.dll - if-no-files-found: warn - retention-days: 7 diff --git a/.github/workflows/webrtc-e2e.yml b/.github/workflows/webrtc-e2e.yml deleted file mode 100644 index d14b8ce..0000000 --- a/.github/workflows/webrtc-e2e.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: webrtc-e2e - -# Live WebRTC realness check against the shipped patched binary. -# -# Manual (workflow_dispatch) on purpose: it needs a firefox-N binary that -# carries the WebRTC fixes (synthetic srflx in genuine nICEr form + the -# default-route fallback behind a proxy). Run it after publishing such a -# binary — it is the release gate for "WebRTC looks real behind a proxy". -# Until that binary ships, test_not_blocked_behind_tcp_only_socks is EXPECTED -# to fail (the old binary is fully blocked behind a SOCKS proxy), which is the -# whole point of the gate. -# -# No smartproxy / credentials: the "behind a proxy" condition is faked by an -# in-process TCP-only SOCKS5 server (refuses UDP ASSOCIATE) and the egress IP -# is injected as an RFC 5737 TEST-NET address. Fully self-contained. - -on: - workflow_dispatch: - -jobs: - webrtc-e2e: - name: webrtc realness (ubuntu, py3.12) - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python 3.12 - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: pip - - - name: Install package + dev extras - run: | - python -m pip install --upgrade pip - pip install -e ".[dev]" - - - name: Fetch the patched Firefox binary - run: python -m invisible_playwright fetch - - - name: Resolve binary path - run: echo "STEALTHFOX_E2E_BINARY=$(python -m invisible_playwright path)" >> "$GITHUB_ENV" - - - name: Run WebRTC realness e2e (xvfb for the headless Firefox) - run: | - sudo apt-get update && sudo apt-get install -y xvfb - xvfb-run -a pytest tests/test_webrtc_realness.py -m e2e -o addopts="" -v -rs diff --git a/CHANGELOG.md b/CHANGELOG.md index f142d90..731f740 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,46 +6,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] -### Added -- `timezone="auto"`: the browser timezone is auto-derived from the egress IP. By default (no explicit timezone) it ALWAYS resolves — from the proxy egress when a proxy is set, otherwise from the host's own public IP — so the zone can never disagree with the IP (the classic `timezone_mismatch` signal). An explicit `"Area/City"` is the only way to force a specific zone. On failure: with a proxy the launch raises (no silent host-TZ fallback behind a foreign proxy); without a proxy it falls back to the host TZ so a transient lookup can't break the launch. -- The egress IP is mapped to its IANA zone with an offline mmdb (`daijro/geoip-all-in-one`). It auto-updates against the upstream weekly rebuild: cached locally, re-checked after `GEOIP_REFRESH_DAYS` (7), older copies pruned, and a stale cache is reused when offline. `STEALTHFOX_GEOIP_MMDB` points at your own `.mmdb` to skip the download. -- `resolve_session_timezone(timezone, proxy)` and `ensure_geoip_mmdb()` re-exported at the package root (plus `GeoTimezoneError`) so integrations that own their launch can reproduce the resolution. -- `tests/test_geo.py` (37) + `tests/test_geoip_update.py` (freshness / auto-update / offline fallback) unit tests. - -### Changed -- New runtime dependencies: `requests[socks]` (SOCKS egress lookup), `maxminddb` (mmdb reader), `tzdata` (IANA database for `zoneinfo`, which Windows lacks). - -## [0.2.0] - 2026-05-28 - -### Added -- Public config helpers in `invisible_playwright.config`: `get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display)` returns a complete `firefox_user_prefs` dict; `get_default_args()` returns the baseline CLI args list (currently empty). Both also re-exported at the package root. -- `invisible_playwright.ensure_binary` re-exported at the package root for parity with the `cloakbrowser.download.ensure_binary` integration pattern that downstream projects (Skyvern, Crawlee, agno) already expect. -- These helpers let third-party fetchers (changedetection.io plugins, Crawlee `BrowserPool` subclasses, agno toolkits) drive `playwright.firefox.launch(executable_path=..., firefox_user_prefs=...)` themselves without depending on the `InvisiblePlaywright` context manager owning the lifecycle. -- `tests/unit/test_config_public.py`: 14 unit tests covering deterministic seed, locale / timezone / pin / extra_prefs / humanize variations, and round-trip via the public namespace. - -### Unchanged -- `InvisiblePlaywright` context manager surface is identical (backwards compatible). -- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build. - -## [0.1.8] - 2026-05-23 - -### Fixed -- [#20](https://github.com/feder-cr/invisible_playwright/issues/20): cross-origin iframes were unreachable from Playwright. `element_handle.content_frame()` returned `None`, `frame.evaluate()` threw cross-origin SOP errors, and `frame_locator(...).click()` timed out even with `force=True`. Root cause: FF150 defaults `fission.webContentIsolationStrategy=1` (`IsolateEverything`), which site-isolates every cross-origin iframe into a separate `webIsolated` content process even when `fission.autostart=False`. The parent's Juggler FrameTree then has a Frame placeholder with no docShell and no URL — every protocol op that needs to enter the iframe fails. Fix: pin `fission.webContentIsolationStrategy=0` (`IsolateNothing`) in the baseline prefs. The setting can be flipped back per session via `extra_prefs={"fission.webContentIsolationStrategy": 1}`. - -### Added -- `tests/test_cross_origin_iframe.py`: 4 unit + 5 e2e regression sentinels for cross-origin iframe interaction. The e2e layer runs entirely offline against two local HTTP servers on `127.0.0.1` (two ports = two SOP origins) and covers `page.frames` URL tracking, `content_frame()`, `frame.evaluate()`, `frame_locator(...).locator(...)`, and end-to-end `dispatch_event("click")` for plain, sandboxed and titled iframes. A future FF upgrade or fingerprint A/B that flips the pref back to `1` will fail the suite before shipping. - -### Unchanged -- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build was needed. - -## [0.1.7] - 2026-05-21 - -### Fixed -- [#18](https://github.com/feder-cr/invisible_playwright/issues/18): Tab crash when running with `headless=True` on Windows on pages that trigger cross-process navigation. Two separate bugs that only manifested together: (1) the Chromium content sandbox at default level 6 puts content processes on `kAlternateWinstation`, but the wrapper hides the browser window on its own alt-desktop (`CreateDesktop` for headless on Windows). Mismatched desktops → cross-process navigations couldn't reparent windows → content process exits cleanly and Playwright fires `page.on('crash')`. (2) The canvas2d `getImageData` stealth spoof wrote to a read-only mapped `DataSourceSurface`. On GPU-backed canvases that memory is write-protected → segfault during the final `getImageData` at page unload. Wrapper now sets `security.sandbox.content.level=4` in the alt-desktop workaround set, and `firefox-7` ships the source fix that moves the noise to the JS array's writable backing buffer. - -### Changed -- `BINARY_VERSION` bumped from `firefox-5` to `firefox-7`. `firefox-6` was rolled back when its partial fix turned out to be wrong (the iframe-burst hypothesis was a dead end; bisection in the evening found the real two-bug cause documented above). - ## [0.1.6] - 2026-05-21 ### Added @@ -73,7 +33,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [0.1.3] - 2026-05-19 ### Changed -- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible_firefox#stealth/150](https://github.com/feder-cr/invisible_firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`). +- `BINARY_VERSION` bumped from `firefox-2` to `firefox-3`. The new archives on both Windows and Linux are built from a clean clone of [feder-cr/invisible-firefox#stealth/150](https://github.com/feder-cr/invisible-firefox/tree/stealth/150) — the consolidated source-of-truth fork (renamed from `feder-cr/firefox`; the companion `feder-cr/firefox-stealth` patches repo was deleted, all patches now live as commits on top of `mozilla-firefox/firefox`). - The patched Firefox archive now ships the **proper C++ implementation** of `windowUtils.jugglerSendMouseEvent`, replacing the JS shim from 0.1.2. ### C++ fixes landed in this release @@ -84,7 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - **C7 (partial)**: storage stub for `nsIDocShell.languageOverride`. Workaround `InvisiblePlaywright(locale="")` recommended until full BC FIELD port lands. ### Verified -- Both archives built from same source: feder-cr/invisible_firefox commit `68906f1f9c55`. +- Both archives built from same source: feder-cr/invisible-firefox commit `68906f1f9c55`. - Windows + Linux smoke suite green: launch, `ctx.new_page()`, `page.mouse.{move,down,up,click,wheel}`, `navigator.webdriver=false`, sannysoft 32/33 PASS. - SHA256 published in `checksums.txt` on the `firefox-3` release. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8eb110d..b56e5d3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,7 +7,7 @@ Thanks for your interest in improving this project. Contributions are welcome vi - **Bug?** Open a [bug report](https://github.com/feder-cr/invisible_playwright/issues/new?template=bug_report.yml). - **Idea?** Open a [feature request](https://github.com/feder-cr/invisible_playwright/issues/new?template=feature_request.yml). - **Security issue?** Do **not** open a public issue — see [SECURITY.md](SECURITY.md). -- **The C++ patches** live in the companion repo [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there. +- **The C++ patches** live in the companion repo [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) (branch `stealth/150`). Bugs in fingerprint spoofing usually belong there. ## Scope @@ -18,7 +18,7 @@ This repository ships the **Python wrapper** (`invisible_playwright`) around a p - Binary download/caching, CLI, proxy plumbing - Tests, docs, examples, packaging -Out of scope (belongs in `invisible_firefox`): +Out of scope (belongs in `invisible-firefox`): - Changes to the Firefox C++ source - New preferences exposed by the patched binary @@ -65,7 +65,7 @@ Before opening, please: - Search [existing issues](https://github.com/feder-cr/invisible_playwright/issues) — the bug may already be tracked. - Reproduce on the **latest release** if possible. -- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible_firefox` instead. +- Confirm the issue is in the Python wrapper, not the patched Firefox itself. If a fingerprint is leaking or a detector flags the browser, open the issue at `feder-cr/invisible-firefox` instead. Include: diff --git a/README.md b/README.md index 0ef05d1..c95e4fa 100644 --- a/README.md +++ b/README.md @@ -6,26 +6,56 @@ [![Firefox 150.0.1](https://img.shields.io/badge/firefox-150.0.1-orange.svg)](https://www.mozilla.org/firefox/) [![GitHub release](https://img.shields.io/github/v/release/feder-cr/invisible_playwright.svg)](https://github.com/feder-cr/invisible_playwright/releases) [![GitHub stars](https://img.shields.io/github/stars/feder-cr/invisible_playwright.svg?style=social)](https://github.com/feder-cr/invisible_playwright/stargazers) -[![browser launches](https://img.shields.io/github/downloads/feder-cr/invisible_firefox/usage-counter/total?label=browser%20launches&color=blue)](https://github.com/feder-cr/invisible_firefox/releases/tag/usage-counter) [![LinkedIn](https://img.shields.io/badge/LinkedIn-Federico%20Elia-0A66C2?logo=linkedin&logoColor=white)](https://it.linkedin.com/in/federico-elia-5199951b6) -**Stealth Firefox that passes every bot detection test. Drop-in Playwright replacement, fingerprint patched at the C++ level, not a JavaScript shim.** +A patched Firefox **100% Playwright-compatible** that passes the hardest browser-fingerprint detectors in the wild. -![invisible_playwright - 5/5 detection suites passed](docs/screenshots/hero.gif) +## Results + +### Google reCAPTCHA v3 - **0.90 / 1.0** + +Top-tier score. Google classifies the session as "very likely a human". Most anti-detect stacks plateau around 0.3-0.7. + +![reCAPTCHA score 0.90](docs/screenshots/recaptcha_score.png) + +### Fingerprint Pro - **bot: not detected, VPN: false, tampering: false, dev tools: not detected** + +FingerprintJS Pro's full Smart Signals battery flips every flag to "Not detected". Browser correctly identified as Firefox 150 on Windows 10. Confidence score 0.9. + +![FingerprintPro not detected](docs/screenshots/fingerprintpro.png) + +### CreepJS - **0 lies**, fingerprint is internally coherent + +No contradictions between headless hints, spoofed values, and real rendering output. That "0 lies" is what kills most anti-detect browsers: one inconsistency (e.g. Chrome UA + Firefox WebGL) and the trust score collapses. + +![CreepJS 0 lies](docs/screenshots/creepjs.png) + +### BrowserLeaks WebRTC - **no public IP leak** + +WebRTC srflx address is the proxy egress IP; host candidates are private LAN. The real public IP never leaks via STUN, even on pages that configure their own ICE servers. Stock Firefox exposes an mDNS hostname (e.g. `abc-1234.local`) as a host ICE candidate, which is itself a stable per-session signal detectors fingerprint. invisible_playwright replaces host candidates with synthetic private-LAN IPs that match the spoofed network, removing the mDNS tell. + +![WebRTC no leaks](docs/screenshots/webrtc.png) + +### bot.sannysoft.com - **all checks pass** + +Every row green: WebDriver not present, Chrome-only properties absent, plugin/mime/languages arrays coherent, permissions API correct, iframe/source window checks pass. + +![Sannysoft all green](docs/screenshots/sannysoft.png) + +--- ## Why it's powerful - -**Most other anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems: +**Most anti-detect browsers patch Chromium at the JavaScript level** - they override `navigator`, `WebGLRenderingContext.getParameter`, canvas APIs, and so on via injected scripts. This has two fatal problems: 1. **JS patches are detectable.** Anti-bots enumerate native function `.toString()`, check descriptor configurability, compare property enumeration order, watch for prototype mutations. Every patch leaves a fingerprint of its own. CreepJS has an entire battery of "lies detectors" built around this. 2. **Chromium itself is now suspect.** Residential-proxy bot traffic is overwhelmingly Chromium-based, so detectors weight anything Chromium-shaped as risky by default. Chromium-based forks inherit Chrome's open-source layers (BoringSSL, Blink, V8, ANGLE) cleanly, but they still cannot fully match Chrome in practice: Chrome ships closed-source components on top (Widevine, proprietary codecs, Google Update / Safe Browsing endpoints) that flip detectable JS feature flags and network signals, and forks lag Chrome's release cadence by days to weeks, leaving telltale version-specific behaviours that detectors lock onto. **invisible_playwright patches Firefox at the C++ level.** The spoofed values come back out through the normal Gecko paths - there is no JS shim, no override, no `Object.defineProperty`. **From the page's point of view, the browser is just telling the truth.** Anti-bot lie-detectors have nothing to latch onto. -invisible_playwright spoofs **all the layers that matter, together, coherently**: Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) for the full per-layer breakdown of which C++ files are patched and why. +invisible_playwright spoofs **all the layers that matter, together, coherently** — Navigator, screen, GPU/WebGL, Canvas, fonts, audio, WebRTC, timezone, DevTools detection, SOCKS5 auth, and the rest. See [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox) for the full per-layer breakdown of which C++ files are patched and why. Everything is driven by preferences - no hardcoded values in the binary. You change one pref, you change the spoofed value. @@ -33,21 +63,23 @@ Everything is driven by preferences - no hardcoded values in the binary. You cha ## How it compares -**CloakBrowser** ships a similar pitch for Chromium, but its binary is **closed source** (the source-level patches are not published, you only get the compiled output), and it still hits the Chromium reCAPTCHA ceiling. The commercial anti-detect browsers (**Multilogin**, **GoLogin**, AdsPower, Dolphin, Kameleo) are paid SaaS that overlay JS-layer spoofing on a patched Chromium. Managed profiles are nice but raw detection bypass sits below both Camoufox and us. +Commercial anti-detect browsers (Multilogin Mimic, GoLogin Orbita, AdsPower, Dolphin Anty) ship patched Chromium and apply most spoofing at the JavaScript layer. A few (Kameleo, Multilogin Stealthfox) also offer Firefox-based profiles, but the spoofing pattern is the same: runtime overrides on top of an unmodified rendering engine. That's the ceiling - and it's a low one. -| | invisible_playwright | Camoufox | CloakBrowser | Multilogin | +| | invisible_playwright | Multilogin / GoLogin | AdsPower / Dolphin | Kameleo | |---|---|---|---|---| -| Engine | Firefox 150 | Firefox (~1 year old base) | Chromium | Chromium fork | -| Patch depth | C++ source | C++ source | C++ source (binary only) | JS overrides | -| Maintenance | Active (weekly) | Gap (~1 year) | Active | Active SaaS | -| Open source | ✅ MIT | ✅ MPL | ❌ Closed source | ❌ Closed source | -| `.toString()` clean | ✅ | ✅ | ✅ | ❌ Detectable shims | -| Canvas / WebGL / Audio | ✅ C++ | ⚠️ Drift vs current FF | ✅ C++ | ⚠️ JS override | -| SOCKS5 auth | ✅ Patched | ❌ | ⚠️ Playwright proxy | ⚠️ Varies | -| **reCAPTCHA v3 score** | **0.90** | ~0.3-0.5 | ~0.3-0.5 | ~0.3-0.6 | -| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected | -| CreepJS lies | ✅ 0 | ❌ Multiple | ✅ 0 | ❌ Multiple | -| Cost | Free | Free | Free | From $99/mo | +| Engine | Firefox (open source) | Chromium fork | Chromium fork | Chromium | +| Patch depth | C++ source | JS overrides | JS overrides | JS overrides | +| `.toString()` clean | ✅ Native Gecko path | ❌ Detectable shims | ❌ Detectable shims | ❌ Detectable shims | +| Canvas / WebGL | ✅ C++ level | ⚠️ JS override | ⚠️ JS override | ⚠️ JS override | +| SOCKS5 auth | ✅ Patched | ⚠️ Varies | ⚠️ Varies | ❌ | +| Self-hosted | ✅ | ❌ SaaS | ❌ SaaS | ❌ Cloud | +| reCAPTCHA v3 score | **0.90** | ~0.3-0.6 | ~0.3-0.5 | ~0.3-0.5 | +| FP Pro - bot detected | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected | +| FP Pro - tampering | ✅ Not detected | ❌ Detected | ❌ Detected | ❌ Detected | +| FP Pro - VPN flag | ✅ false | ❌ true | ❌ true | ❌ true | +| CreepJS lies | ✅ 0 | ❌ multiple | ❌ multiple | ❌ multiple | + +Competitor scores reflect our own testing on Windows 10 against the same five detection suites used above; results may vary with their evolving builds. --- @@ -140,21 +172,6 @@ with InvisiblePlaywright(proxy=proxy) as browser: Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak. -### Timezone - -The browser timezone follows `timezone=`: - -```python -# default: timezone is auto-derived from the egress IP (proxy egress if a -# proxy is set, otherwise the host's own public IP) -with InvisiblePlaywright(proxy=proxy) as browser: - ... - -# explicit IANA zone always wins — the only way to force a specific zone -with InvisiblePlaywright(proxy=proxy, timezone="America/New_York") as browser: - ... -``` - ### Pinning specific fingerprint fields By default everything comes from `seed`. To force specific values while the rest stays seed-derived: @@ -186,6 +203,24 @@ invisible_playwright version # wrapper and binary versions invisible_playwright clear-cache # remove all cached binaries ``` +## Known issues + +### `headless=True` on Windows can cause tab crashes on sites with heavy cross-process navigation + +Reported as [#18](https://github.com/feder-cr/invisible_playwright/issues/18) (`id.sky.com` and similar). On Windows, `headless=True` runs Firefox headed on a hidden alt-desktop created via `CreateDesktop`. Some sites (id.sky.com, anything else loading Adobe AppMeasurement in a way that triggers cross-process navigation) end up firing `page.on('crash')` after about 10 seconds. The cause is a window-parenting interaction between the alt-desktop and the GPU/content processes; the workaround is one of: + +```python +# Option A — keep the visible window (no alt-desktop) +with InvisiblePlaywright(seed=42, headless=False) as browser: + ... + +# Option B — run inside Xvfb on Linux (alt-desktop bug is Windows-only) +``` + +The visible window case works on every site we've tested. Linux + Xvfb is unaffected. + +--- + ## Related projects invisible_playwright takes a different angle than the major Firefox-hardening projects but stands on their shoulders: @@ -198,4 +233,4 @@ invisible_playwright takes a different angle than the major Firefox-hardening pr ## License -MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox). +MIT - see [LICENSE](LICENSE). The patched Firefox binary is distributed under the MPL-2.0 (Firefox upstream license). The C++ patches against mozilla-central that produce that binary are at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox). diff --git a/SECURITY.md b/SECURITY.md index 83959a2..19dbc11 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -41,7 +41,7 @@ In scope: Out of scope here (report to the relevant project): -- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox/security/advisories/new) +- Vulnerabilities in the patched Firefox C++ source — open a private report at [feder-cr/invisible-firefox](https://github.com/feder-cr/invisible-firefox/security/advisories/new) - Vulnerabilities in upstream Firefox / mozilla-central — report to Mozilla per https://www.mozilla.org/security/ - Vulnerabilities in third-party dependencies (`playwright`, `requests`, etc.) — report to those projects directly diff --git a/docs/screenshots/hero.gif b/docs/screenshots/hero.gif deleted file mode 100644 index eadbf1b..0000000 Binary files a/docs/screenshots/hero.gif and /dev/null differ diff --git a/pyproject.toml b/pyproject.toml index d08f552..7793173 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "invisible-playwright" -version = "0.2.0" +version = "0.1.6" description = "Playwright wrapper for a patched Firefox with deterministic stealth profile." readme = "README.md" requires-python = ">=3.11" @@ -22,9 +22,7 @@ classifiers = [ dependencies = [ "playwright>=1.40", "platformdirs>=4", - "requests[socks]>=2.31", - "maxminddb>=2.2", - "tzdata>=2024.1", + "requests>=2.31", "tqdm>=4.66", "pywin32>=306; sys_platform == 'win32'", ] diff --git a/src/invisible_playwright/__init__.py b/src/invisible_playwright/__init__.py index 0871021..6bae9f3 100644 --- a/src/invisible_playwright/__init__.py +++ b/src/invisible_playwright/__init__.py @@ -15,30 +15,8 @@ Quickstart: page = browser.new_page() page.click("#submit") # expanded into a Bezier trajectory """ -from .config import get_default_args, get_default_stealth_prefs -from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION -from ._geo import GeoTimezoneError, resolve_session_timezone -from .download import ensure_binary, ensure_geoip_mmdb from .launcher import InvisiblePlaywright +from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION -from importlib.metadata import PackageNotFoundError, version as _pkg_version - -try: - __version__ = _pkg_version("invisible-playwright") -except PackageNotFoundError: - # Editable / source checkout without an install record: fall back to a - # marker rather than risk shipping a stale hardcoded string. - __version__ = "0.0.0+unknown" - -__all__ = [ - "InvisiblePlaywright", - "ensure_binary", - "ensure_geoip_mmdb", - "get_default_stealth_prefs", - "get_default_args", - "resolve_session_timezone", - "GeoTimezoneError", - "BINARY_VERSION", - "FIREFOX_UPSTREAM_VERSION", - "__version__", -] +__version__ = "0.1.0" +__all__ = ["InvisiblePlaywright", "BINARY_VERSION", "FIREFOX_UPSTREAM_VERSION", "__version__"] diff --git a/src/invisible_playwright/_fpforge/_sampler.py b/src/invisible_playwright/_fpforge/_sampler.py index 692f600..5653db8 100644 --- a/src/invisible_playwright/_fpforge/_sampler.py +++ b/src/invisible_playwright/_fpforge/_sampler.py @@ -84,12 +84,6 @@ _FONT_POOL = _load("font_pool.json") _FONT_CORE: list = _FONT_POOL["core"] _FONT_OPTIONAL: list = _FONT_POOL["optional"] _CPT_FONTS_OPT = _load("cpt_fonts_optional_given_class.json")["table"] -# Browsing-history pool + CPT (per-class probabilities for visited sites). -# Drives _recaptcha_seed's cookie pre-seed: each persona ends up with a -# coherent list of ~15-30 visited sites whose categories correlate with -# gpu_class (workstation → dev-heavy, integrated_old → shop+news-heavy). -_BROWSING_POOL: list = _load("browsing_pool.json")["entries"] -_CPT_BROWSING = _load("cpt_browsing_given_class.json")["table"] # ═══════════════════════════════════════════════════════════════════════ @@ -288,33 +282,6 @@ def derive_font_whitelist(gpu_class: str, rng) -> str: return derive_font_prefs(gpu_class, rng)["whitelist"] -# ═══════════════════════════════════════════════════════════════════════ -# BROWSING HISTORY (Bayesian: per-site P(visited|gpu_class)) -# ═══════════════════════════════════════════════════════════════════════ -def derive_browsing_history(gpu_class: str, rng) -> list: - """Sample which sites this persona has visited recently. - - Each site in the pool has a per-class probability (CPT). We sample - independently per-site, producing a list of dicts: - [{"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, ...] - - Sum of CPT probabilities per class is tuned to land ~15-30 visited sites - on average — an established-user signature. Sorted by name for stable - output across runs of the same seed. - """ - cpt = _CPT_BROWSING.get(gpu_class) - if cpt is None: - cpt = _CPT_BROWSING["mid_range"] - visited: list = [] - for entry in _BROWSING_POOL: - name = entry["name"] - p = cpt.get(name, 0.3) # default 0.3 for missing CPT row - if rng.random() < p: - visited.append(dict(entry)) # copy to avoid mutating pool - visited.sort(key=lambda e: e["name"]) - return visited - - # ═══════════════════════════════════════════════════════════════════════ # PUBLIC API: Forge # ═══════════════════════════════════════════════════════════════════════ @@ -383,12 +350,6 @@ class Forge: bundle["gpu_class"], self._rng ).items() }, - # Bayesian browsing history (per-class P(visited|gpu_class)). - # Consumed by _recaptcha_seed.py to seed coherent cookie history - # when invisible_playwright is launched with prep_recaptcha=True. - "browsing_history": derive_browsing_history( - bundle["gpu_class"], self._rng - ), } diff --git a/src/invisible_playwright/_fpforge/data/browsing_pool.json b/src/invisible_playwright/_fpforge/data/browsing_pool.json deleted file mode 100644 index 6e98cd9..0000000 --- a/src/invisible_playwright/_fpforge/data/browsing_pool.json +++ /dev/null @@ -1,64 +0,0 @@ -{ - "_comment": [ - "Pool of everyday websites used by the browsing_history node.", - "Each entry: { name, category, cookie_profile }.", - "- name: bare domain (no scheme, no leading dot).", - "- category: dev / shop / news / reference / media / community / misc.", - "- cookie_profile: short tag pointing to a cookie-template recipe used by", - " _recaptcha_seed.py to generate concrete cookies (so heavy-analytics sites", - " get _ga+_gid+OneTrust, simple sites get just _ga, dev tools get GH-style).", - "Add new entries here + add per-class probabilities in cpt_browsing_given_class.json." - ], - "entries": [ - {"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"}, - {"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"}, - {"name": "mozilla.org", "category": "reference", "cookie_profile": "ga_consent"}, - {"name": "w3schools.com", "category": "dev", "cookie_profile": "ga_consent_clarity"}, - {"name": "mdn.io", "category": "dev", "cookie_profile": "minimal"}, - {"name": "duckduckgo.com", "category": "reference", "cookie_profile": "minimal"}, - {"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, - {"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"}, - {"name": "npmjs.com", "category": "dev", "cookie_profile": "ga_consent"}, - {"name": "gitlab.com", "category": "dev", "cookie_profile": "ga_cf"}, - {"name": "pypi.org", "category": "dev", "cookie_profile": "minimal"}, - {"name": "docs.python.org", "category": "dev", "cookie_profile": "minimal"}, - {"name": "rust-lang.org", "category": "dev", "cookie_profile": "ga_consent"}, - {"name": "go.dev", "category": "dev", "cookie_profile": "ga_consent"}, - {"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "ebay.com", "category": "shop", "cookie_profile": "ga_consent"}, - {"name": "etsy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "bestbuy.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "target.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "nytimes.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, - {"name": "cnn.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "bbc.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "theguardian.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, - {"name": "reuters.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "apnews.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "washingtonpost.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "techcrunch.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, - {"name": "theverge.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "arstechnica.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "wired.com", "category": "news", "cookie_profile": "ga_consent_clarity"}, - {"name": "engadget.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "9to5mac.com", "category": "news", "cookie_profile": "ga_consent"}, - {"name": "medium.com", "category": "community", "cookie_profile": "ga_consent"}, - {"name": "dev.to", "category": "community", "cookie_profile": "ga_consent"}, - {"name": "reddit.com", "category": "community", "cookie_profile": "ga_cf"}, - {"name": "news.ycombinator.com", "category": "community", "cookie_profile": "minimal"}, - {"name": "quora.com", "category": "community", "cookie_profile": "ga_consent_clarity"}, - {"name": "stackexchange.com", "category": "community", "cookie_profile": "ga_consent_clarity"}, - {"name": "imdb.com", "category": "media", "cookie_profile": "ga_consent_clarity"}, - {"name": "rottentomatoes.com", "category": "media", "cookie_profile": "ga_consent"}, - {"name": "metacritic.com", "category": "media", "cookie_profile": "ga_consent"}, - {"name": "allrecipes.com", "category": "misc", "cookie_profile": "ga_consent_clarity"}, - {"name": "epicurious.com", "category": "misc", "cookie_profile": "ga_consent"}, - {"name": "tripadvisor.com", "category": "misc", "cookie_profile": "ga_consent_clarity"}, - {"name": "weather.com", "category": "reference", "cookie_profile": "ga_consent"}, - {"name": "timeanddate.com", "category": "reference", "cookie_profile": "ga_consent"}, - {"name": "thesaurus.com", "category": "reference", "cookie_profile": "ga_consent_clarity"}, - {"name": "kayak.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "booking.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "airbnb.com", "category": "shop", "cookie_profile": "ga_consent"} - ] -} diff --git a/src/invisible_playwright/_fpforge/data/cpt_browsing_given_class.json b/src/invisible_playwright/_fpforge/data/cpt_browsing_given_class.json deleted file mode 100644 index b2e3b1a..0000000 --- a/src/invisible_playwright/_fpforge/data/cpt_browsing_given_class.json +++ /dev/null @@ -1,138 +0,0 @@ -{ - "_comment": [ - "Per-class probability that a persona of a given gpu_class has visited each", - "site in the pool. Used by the browsing_history node to derive a coherent", - "visited-domain list per persona.", - "", - "Probabilities are tuned so each class samples ~15-30 sites on average", - "(sum across all 50 entries falls in that range), giving an established-user", - "look. Categories are biased by class:", - " - workstation/high_end: higher P(dev) + high P(news/media)", - " - mid_range: balanced", - " - low_end/integrated_*: lower P(dev), higher P(shop/news/reference)", - "", - "Missing class falls back to mid_range via Node CPT pool fallback." - ], - "table": { - "workstation": { - "youtube.com": 0.80, "wikipedia.org": 0.85, "mozilla.org": 0.70, - "w3schools.com": 0.40, "mdn.io": 0.55, "duckduckgo.com": 0.45, - "github.com": 0.95, "stackoverflow.com": 0.90, "npmjs.com": 0.65, - "gitlab.com": 0.50, "pypi.org": 0.55, "docs.python.org": 0.60, - "rust-lang.org": 0.35, "go.dev": 0.30, - "amazon.com": 0.70, "ebay.com": 0.25, "etsy.com": 0.15, - "bestbuy.com": 0.45, "target.com": 0.30, - "nytimes.com": 0.55, "cnn.com": 0.40, "bbc.com": 0.55, - "theguardian.com": 0.45, "reuters.com": 0.40, "apnews.com": 0.30, - "washingtonpost.com": 0.40, - "techcrunch.com": 0.65, "theverge.com": 0.60, "arstechnica.com": 0.65, - "wired.com": 0.50, "engadget.com": 0.35, "9to5mac.com": 0.30, - "medium.com": 0.55, "dev.to": 0.40, "reddit.com": 0.70, - "news.ycombinator.com": 0.65, "quora.com": 0.20, "stackexchange.com": 0.60, - "imdb.com": 0.45, "rottentomatoes.com": 0.25, "metacritic.com": 0.20, - "allrecipes.com": 0.20, "epicurious.com": 0.15, "tripadvisor.com": 0.30, - "weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25, - "kayak.com": 0.30, "booking.com": 0.35, "airbnb.com": 0.30 - }, - "high_end": { - "youtube.com": 0.85, "wikipedia.org": 0.80, "mozilla.org": 0.60, - "w3schools.com": 0.45, "mdn.io": 0.45, "duckduckgo.com": 0.40, - "github.com": 0.85, "stackoverflow.com": 0.80, "npmjs.com": 0.50, - "gitlab.com": 0.40, "pypi.org": 0.45, "docs.python.org": 0.50, - "rust-lang.org": 0.30, "go.dev": 0.25, - "amazon.com": 0.75, "ebay.com": 0.30, "etsy.com": 0.20, - "bestbuy.com": 0.50, "target.com": 0.35, - "nytimes.com": 0.50, "cnn.com": 0.50, "bbc.com": 0.50, - "theguardian.com": 0.40, "reuters.com": 0.35, "apnews.com": 0.30, - "washingtonpost.com": 0.35, - "techcrunch.com": 0.60, "theverge.com": 0.65, "arstechnica.com": 0.60, - "wired.com": 0.50, "engadget.com": 0.40, "9to5mac.com": 0.35, - "medium.com": 0.50, "dev.to": 0.35, "reddit.com": 0.75, - "news.ycombinator.com": 0.55, "quora.com": 0.25, "stackexchange.com": 0.55, - "imdb.com": 0.55, "rottentomatoes.com": 0.35, "metacritic.com": 0.30, - "allrecipes.com": 0.25, "epicurious.com": 0.20, "tripadvisor.com": 0.30, - "weather.com": 0.55, "timeanddate.com": 0.30, "thesaurus.com": 0.25, - "kayak.com": 0.30, "booking.com": 0.40, "airbnb.com": 0.30 - }, - "mid_range": { - "youtube.com": 0.85, "wikipedia.org": 0.75, "mozilla.org": 0.45, - "w3schools.com": 0.40, "mdn.io": 0.30, "duckduckgo.com": 0.35, - "github.com": 0.55, "stackoverflow.com": 0.55, "npmjs.com": 0.30, - "gitlab.com": 0.25, "pypi.org": 0.25, "docs.python.org": 0.30, - "rust-lang.org": 0.15, "go.dev": 0.15, - "amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30, - "bestbuy.com": 0.55, "target.com": 0.40, - "nytimes.com": 0.45, "cnn.com": 0.55, "bbc.com": 0.45, - "theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30, - "washingtonpost.com": 0.30, - "techcrunch.com": 0.45, "theverge.com": 0.50, "arstechnica.com": 0.40, - "wired.com": 0.45, "engadget.com": 0.35, "9to5mac.com": 0.30, - "medium.com": 0.45, "dev.to": 0.25, "reddit.com": 0.70, - "news.ycombinator.com": 0.30, "quora.com": 0.35, "stackexchange.com": 0.40, - "imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.35, - "allrecipes.com": 0.35, "epicurious.com": 0.25, "tripadvisor.com": 0.40, - "weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30, - "kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40 - }, - "low_end": { - "youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.35, - "w3schools.com": 0.30, "mdn.io": 0.20, "duckduckgo.com": 0.30, - "github.com": 0.30, "stackoverflow.com": 0.30, "npmjs.com": 0.15, - "gitlab.com": 0.10, "pypi.org": 0.10, "docs.python.org": 0.15, - "rust-lang.org": 0.05, "go.dev": 0.05, - "amazon.com": 0.85, "ebay.com": 0.50, "etsy.com": 0.40, - "bestbuy.com": 0.55, "target.com": 0.45, - "nytimes.com": 0.40, "cnn.com": 0.60, "bbc.com": 0.40, - "theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.30, - "washingtonpost.com": 0.25, - "techcrunch.com": 0.30, "theverge.com": 0.35, "arstechnica.com": 0.25, - "wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25, - "medium.com": 0.35, "dev.to": 0.15, "reddit.com": 0.65, - "news.ycombinator.com": 0.15, "quora.com": 0.45, "stackexchange.com": 0.25, - "imdb.com": 0.65, "rottentomatoes.com": 0.45, "metacritic.com": 0.35, - "allrecipes.com": 0.45, "epicurious.com": 0.30, "tripadvisor.com": 0.45, - "weather.com": 0.65, "timeanddate.com": 0.25, "thesaurus.com": 0.35, - "kayak.com": 0.35, "booking.com": 0.50, "airbnb.com": 0.40 - }, - "integrated_modern": { - "youtube.com": 0.85, "wikipedia.org": 0.70, "mozilla.org": 0.40, - "w3schools.com": 0.35, "mdn.io": 0.25, "duckduckgo.com": 0.35, - "github.com": 0.40, "stackoverflow.com": 0.40, "npmjs.com": 0.20, - "gitlab.com": 0.15, "pypi.org": 0.20, "docs.python.org": 0.20, - "rust-lang.org": 0.10, "go.dev": 0.10, - "amazon.com": 0.80, "ebay.com": 0.40, "etsy.com": 0.30, - "bestbuy.com": 0.50, "target.com": 0.40, - "nytimes.com": 0.40, "cnn.com": 0.55, "bbc.com": 0.45, - "theguardian.com": 0.35, "reuters.com": 0.30, "apnews.com": 0.30, - "washingtonpost.com": 0.30, - "techcrunch.com": 0.40, "theverge.com": 0.45, "arstechnica.com": 0.30, - "wired.com": 0.40, "engadget.com": 0.30, "9to5mac.com": 0.25, - "medium.com": 0.40, "dev.to": 0.20, "reddit.com": 0.65, - "news.ycombinator.com": 0.25, "quora.com": 0.40, "stackexchange.com": 0.35, - "imdb.com": 0.60, "rottentomatoes.com": 0.40, "metacritic.com": 0.30, - "allrecipes.com": 0.40, "epicurious.com": 0.25, "tripadvisor.com": 0.40, - "weather.com": 0.60, "timeanddate.com": 0.25, "thesaurus.com": 0.30, - "kayak.com": 0.35, "booking.com": 0.45, "airbnb.com": 0.40 - }, - "integrated_old": { - "youtube.com": 0.75, "wikipedia.org": 0.65, "mozilla.org": 0.30, - "w3schools.com": 0.20, "mdn.io": 0.10, "duckduckgo.com": 0.25, - "github.com": 0.15, "stackoverflow.com": 0.20, "npmjs.com": 0.05, - "gitlab.com": 0.05, "pypi.org": 0.05, "docs.python.org": 0.10, - "rust-lang.org": 0.02, "go.dev": 0.02, - "amazon.com": 0.85, "ebay.com": 0.55, "etsy.com": 0.45, - "bestbuy.com": 0.55, "target.com": 0.50, - "nytimes.com": 0.45, "cnn.com": 0.65, "bbc.com": 0.40, - "theguardian.com": 0.30, "reuters.com": 0.25, "apnews.com": 0.35, - "washingtonpost.com": 0.30, - "techcrunch.com": 0.20, "theverge.com": 0.25, "arstechnica.com": 0.15, - "wired.com": 0.30, "engadget.com": 0.20, "9to5mac.com": 0.20, - "medium.com": 0.30, "dev.to": 0.05, "reddit.com": 0.55, - "news.ycombinator.com": 0.05, "quora.com": 0.55, "stackexchange.com": 0.15, - "imdb.com": 0.70, "rottentomatoes.com": 0.50, "metacritic.com": 0.35, - "allrecipes.com": 0.55, "epicurious.com": 0.35, "tripadvisor.com": 0.50, - "weather.com": 0.70, "timeanddate.com": 0.30, "thesaurus.com": 0.40, - "kayak.com": 0.40, "booking.com": 0.55, "airbnb.com": 0.40 - } - } -} diff --git a/src/invisible_playwright/_fpforge/profile.py b/src/invisible_playwright/_fpforge/profile.py index fcdf024..16c52a4 100644 --- a/src/invisible_playwright/_fpforge/profile.py +++ b/src/invisible_playwright/_fpforge/profile.py @@ -120,11 +120,6 @@ class Profile: webgl: WebGLProfile fonts: List[str] dark_theme: bool - # Bayesian browsing-history: list of {name, category, cookie_profile} - # dicts sampled from data/browsing_pool.json with per-class CPT. Used - # by _recaptcha_seed.py to build a coherent cookie pre-seed when the - # caller opts in via Stealthfox(prep_recaptcha=True). - browsing_history: List[Dict[str, str]] = field(default_factory=list) _raw: Dict[str, Any] = field(default_factory=dict, repr=False, compare=False) def to_prefs_dict(self) -> Dict[str, Any]: @@ -260,6 +255,5 @@ def generate_profile(seed: int, pin: Optional[Dict[str, Any]] = None) -> Profile webgl=WebGLProfile(msaa_samples=int(raw["msaa_samples"])), fonts=fonts, dark_theme=bool(raw["dark_theme"]), - browsing_history=list(raw.get("browsing_history") or []), _raw=raw, ) diff --git a/src/invisible_playwright/_geo.py b/src/invisible_playwright/_geo.py deleted file mode 100644 index 02971e1..0000000 --- a/src/invisible_playwright/_geo.py +++ /dev/null @@ -1,164 +0,0 @@ -"""Resolve the session timezone from the egress IP (``timezone="auto"``). - -Approach B: discover the egress IP with one HTTP request — routed *through the -proxy* when one is set, otherwise a direct request that sees the host's own -public IP — then map IP → IANA timezone with an offline mmdb -(``daijro/geoip-all-in-one``, downloaded + cached by ``download.py``). - -Precedence (see ``resolve_session_timezone``): - - explicit IANA → unchanged explicit always wins - "" / "auto" → egress ALWAYS resolve. With a proxy, from the proxy - egress IP; without a proxy, from the host's - own public IP. This is the default. - -On failure: - with a proxy → raise a foreign proxy paired with the host TZ is - the precise ``timezone_mismatch`` signal, so - we fail loudly rather than fall back silently. - without a proxy → "" (host) the host TZ is a safe default, so a transient - lookup failure must not break the launch. -""" -from __future__ import annotations - -import ipaddress -from typing import Any, Dict, Optional -from urllib.parse import quote - -import requests - - -class GeoTimezoneError(RuntimeError): - """Raised when ``timezone="auto"`` cannot resolve a valid IANA zone.""" - - -# Plain-text IP echo endpoints (each returns just the caller's public IP). -_IP_ECHO_ENDPOINTS = ( - "https://api.ipify.org", - "https://icanhazip.com", - "https://checkip.amazonaws.com", -) - -_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://") - - -def _proxy_is_set(proxy: Optional[Dict[str, str]]) -> bool: - if not proxy: - return False - server = (proxy.get("server") or "").strip() - return bool(server) and server.lower() != "direct://" - - -def _proxies_for_requests(proxy: Dict[str, str]) -> Dict[str, str]: - """Translate our proxy dict into a ``requests`` proxies mapping. - - SOCKS5 uses the ``socks5h`` scheme so DNS is resolved proxy-side (matches - ``network.proxy.socks_remote_dns=True`` in the Firefox path). HTTP/HTTPS - pass through unchanged. Credentials are URL-encoded. - """ - server = (proxy.get("server") or "").strip() - low = server.lower() - if low.startswith("socks5://") or low.startswith("socks://"): - scheme = "socks5h" - elif low.startswith("socks4://"): - scheme = "socks4" - elif low.startswith("https://"): - scheme = "https" - else: - scheme = "http" - - host_port = server.split("://", 1)[1] if "://" in server else server - user = proxy.get("username") or "" - pwd = proxy.get("password") or "" - if user: - auth = f"{quote(user, safe='')}:{quote(pwd, safe='')}@" - else: - auth = "" - url = f"{scheme}://{auth}{host_port}" - return {"http": url, "https": url} - - -def discover_egress_ip( - proxy: Optional[Dict[str, str]] = None, *, timeout: float = 10.0 -) -> str: - """Return the public egress IP. - - Routes the request through ``proxy`` when given (SOCKS support requires - ``requests[socks]`` / PySocks); with ``proxy=None`` it makes a direct - request that sees the host's own public IP. Tries each echo endpoint in - turn; raises :class:`GeoTimezoneError` if none return a valid IP. - """ - proxies = _proxies_for_requests(proxy) if proxy else None - last_err: Optional[Exception] = None - for url in _IP_ECHO_ENDPOINTS: - try: - resp = requests.get(url, proxies=proxies, timeout=timeout) - resp.raise_for_status() - ip = resp.text.strip() - ipaddress.ip_address(ip) # validate (raises ValueError if not an IP) - return ip - except Exception as exc: # noqa: BLE001 - try the next endpoint - last_err = exc - continue - raise GeoTimezoneError( - f"could not discover the proxy egress IP via {len(_IP_ECHO_ENDPOINTS)} " - f"endpoints (last error: {last_err!r}). For SOCKS proxies make sure " - f"requests[socks] / PySocks is installed." - ) - - -def ip_to_timezone(ip: str, mmdb_path: Any) -> str: - """Map ``ip`` to its IANA timezone using the offline mmdb. - - Reads the standard MaxMind ``location.time_zone`` field and validates it - against the system tz database. Raises :class:`GeoTimezoneError` if the IP - is absent from the DB or the zone is missing / not a valid IANA name. - """ - import maxminddb - - with maxminddb.open_database(str(mmdb_path)) as reader: - record = reader.get(ip) - if not record: - raise GeoTimezoneError(f"egress IP {ip} not present in the geoip database") - tz = ((record.get("location") or {}) if isinstance(record, dict) else {}).get( - "time_zone" - ) - if not tz: - raise GeoTimezoneError(f"no timezone for egress IP {ip} in the geoip database") - from zoneinfo import ZoneInfo, ZoneInfoNotFoundError - - try: - ZoneInfo(tz) - except (ZoneInfoNotFoundError, ValueError) as exc: - raise GeoTimezoneError( - f"geoip returned an invalid IANA zone {tz!r} for {ip}: {exc}" - ) from exc - return tz - - -def resolve_session_timezone( - timezone: str, proxy: Optional[Dict[str, str]] -) -> str: - """Map the user's ``timezone`` setting to a concrete IANA zone (or ``""``). - - See the module docstring for the full precedence table. ``""``/``"auto"`` - ALWAYS resolve from the egress IP (proxy egress if a proxy is set, else the - host's own public IP). On failure: with a proxy we raise - :class:`GeoTimezoneError` (never silently use the host TZ behind a foreign - proxy); without a proxy we fall back to ``""`` (host TZ) so a transient - lookup failure can't break the launch. - """ - tz = (timezone or "").strip() - if tz and tz.lower() != "auto": - return tz # explicit IANA wins - # "" or "auto" → always resolve from the egress IP. - from .download import ensure_geoip_mmdb - - proxy_set = _proxy_is_set(proxy) - try: - ip = discover_egress_ip(proxy if proxy_set else None) - return ip_to_timezone(ip, ensure_geoip_mmdb()) - except Exception: - if proxy_set: - raise # fail-early behind a proxy (timezone_mismatch trap) - return "" # no proxy: host TZ is a safe fallback diff --git a/src/invisible_playwright/_recaptcha_seed.py b/src/invisible_playwright/_recaptcha_seed.py deleted file mode 100644 index cd998a2..0000000 --- a/src/invisible_playwright/_recaptcha_seed.py +++ /dev/null @@ -1,340 +0,0 @@ -"""Deterministic reCAPTCHA cookie pre-seed. - -Consumes the Bayesian-sampled `browsing_history` from the persona Profile -(see `_fpforge/_sampler.py:derive_browsing_history`). For each visited -site, builds 1-5 realistic cookies whose composition is chosen by the -site's `cookie_profile` tag (analytics-only / consent / cloudflare-bot- -management / etc.). All values seeded deterministically from the persona -seed, so a given persona always presents the SAME cookies across sessions. - -In addition, always seeds 5 cookies on .google.com (NID, CONSENT, SOCS, -_GRECAPTCHA, ENID). Excludes 1P_JAR which was deprecated by Google in 2022 -— including it now is an anachronism flag. - -Public API: - await seed_recaptcha_cookies_async(context, profile, timezone=None) - seed_recaptcha_cookies_sync(context, profile, timezone=None) - -`profile` is an `_fpforge.Profile`; `timezone` is the IANA tz (e.g. -"Europe/Rome") used to derive the CONSENT cookie's language token, so a -European-tz persona gets CONSENT in their language not en+FX. -""" -from __future__ import annotations - -import datetime -import random -import time -from typing import Any, List, Optional - -# URL-safe base64 alphabet (no padding chars). -_B64_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" -_HEX_ALPHABET = "0123456789abcdef" - - -def _sub_seed(seed: int, tag: str) -> int: - """FNV-1a mix → independent PRNG streams per logical bucket from one seed.""" - h = 0xcbf29ce484222325 ^ (seed & 0xFFFFFFFF) - for c in tag.encode("ascii"): - h ^= c - h = (h * 0x100000001b3) & 0xFFFFFFFFFFFFFFFF - return h or 0xdeadbeef - - -def _b64_rand(rng: random.Random, length: int) -> str: - return "".join(rng.choice(_B64_ALPHABET) for _ in range(length)) - - -def _hex_rand(rng: random.Random, length: int) -> str: - return "".join(rng.choice(_HEX_ALPHABET) for _ in range(length)) - - -def _yyyymmdd_utc(ts: int) -> str: - return datetime.datetime.utcfromtimestamp(ts).strftime("%Y%m%d") - - -# IANA timezone -> (country_code, lang) for CONSENT cookie coherence. -# Real EU users get CONSENT with `++NNN`; non-EU gets `en+FX+NNN`. -# Default fallback `en+FX+NNN` for any tz not in this map. -_TZ_TO_REGION = { - "Europe/Rome": ("IT", "it"), - "Europe/Berlin": ("DE", "de"), - "Europe/Paris": ("FR", "fr"), - "Europe/Madrid": ("ES", "es"), - "Europe/London": ("GB", "en"), - "Europe/Amsterdam": ("NL", "nl"), - "Europe/Brussels": ("BE", "fr"), - "Europe/Vienna": ("AT", "de"), - "Europe/Zurich": ("CH", "de"), - "Europe/Dublin": ("IE", "en"), - "Europe/Lisbon": ("PT", "pt"), - "Europe/Stockholm": ("SE", "sv"), - "Europe/Oslo": ("NO", "no"), - "Europe/Copenhagen": ("DK", "da"), - "Europe/Helsinki": ("FI", "fi"), - "Europe/Warsaw": ("PL", "pl"), - "Europe/Prague": ("CZ", "cs"), - "Europe/Athens": ("GR", "el"), - "Asia/Tokyo": ("FX", "ja"), - "Asia/Shanghai": ("FX", "zh"), - "Asia/Hong_Kong": ("FX", "zh"), - "Asia/Seoul": ("FX", "ko"), -} - - -def _consent_region_lang(timezone: Optional[str]) -> tuple: - """Map IANA tz → (region_token, lang_2char) for CONSENT cookie. - Default `("FX", "en")` for US/unknown.""" - if timezone and timezone in _TZ_TO_REGION: - return _TZ_TO_REGION[timezone] - return ("FX", "en") - - -# --------------------------------------------------------------------------- -# .google.com cookie batch (always present, regardless of browsing history) -# --------------------------------------------------------------------------- - -def _google_cookies(rng: random.Random, now: int, - timezone: Optional[str] = None) -> List[dict]: - consent_age = rng.randint(60, 720) * 86400 - region, lang = _consent_region_lang(timezone) - # NID 3-digit prefix range broadened to 100-540 to cover historical NID - # versions (137, 105, 511, 525 etc. observed in real captures). - return [ - {"name": "NID", - "value": f"{rng.randint(100, 540)}={_b64_rand(rng, 178)}", - "domain": ".google.com", "path": "/", - "expires": now + 180 * 86400, - "httpOnly": True, "secure": True, "sameSite": "None"}, - {"name": "CONSENT", - "value": f"YES+cb.{_yyyymmdd_utc(now - consent_age)}-" - f"{rng.randint(10, 19):02d}-p{rng.randint(0, 9)}." - f"{lang}+{region}+{rng.randint(100, 999)}", - "domain": ".google.com", "path": "/", - "expires": now + 395 * 86400, - "secure": True, "sameSite": "Lax"}, - # 1P_JAR removed: Google deprecated it in 2022. Including it now is - # an anachronism flag for fingerprinters that look at cookie freshness. - {"name": "SOCS", - "value": f"CAES{_b64_rand(rng, 56)}", - "domain": ".google.com", "path": "/", - "expires": now + 395 * 86400, - "secure": True, "sameSite": "Lax"}, - {"name": "_GRECAPTCHA", - "value": _b64_rand(rng, 124), - "domain": ".google.com", "path": "/", - "expires": now + 180 * 86400, - "secure": True, "sameSite": "None"}, - {"name": "ENID", - "value": _b64_rand(rng, 252), - "domain": ".google.com", "path": "/", - "expires": now + 395 * 86400, - "httpOnly": True, "secure": True, "sameSite": "Lax"}, - ] - - -# --------------------------------------------------------------------------- -# Per-site cookie generators (recipes keyed by site["cookie_profile"]) -# --------------------------------------------------------------------------- - -def _norm_domain(domain: str) -> str: - return domain if domain.startswith(".") else "." + domain - - -def _ga_cookie(rng: random.Random, now: int, domain: str) -> dict: - first_age = rng.randint(7, 395) * 86400 - return {"name": "_ga", - "value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - first_age}", - "domain": domain, "path": "/", - "expires": now + 395 * 86400, - "secure": True, "sameSite": "Lax"} - - -def _gid_cookie(rng: random.Random, now: int, domain: str) -> dict: - return {"name": "_gid", - "value": f"GA1.2.{rng.randint(100000000, 999999999)}.{now - rng.randint(60, 86400)}", - "domain": domain, "path": "/", - "expires": now + 86400, - "secure": True, "sameSite": "Lax"} - - -def _cf_bm_cookie(rng: random.Random, now: int, domain: str) -> dict: - return {"name": "__cf_bm", - "value": f"{_b64_rand(rng, 43)}.{rng.randint(1700000000, now)}-1-1-1-1", - "domain": domain, "path": "/", - "expires": now + 1800, - "secure": True, "sameSite": "None"} - - -def _onetrust_cookie(rng: random.Random, now: int, domain: str) -> dict: - age_d = rng.randint(7, 365) - iso = datetime.datetime.utcfromtimestamp(now - age_d * 86400).strftime( - "%Y-%m-%dT%H:%M:%S.000Z" - ) - return {"name": "OptanonAlertBoxClosed", - "value": iso, - "domain": domain, "path": "/", - "expires": now + 395 * 86400, - "secure": True, "sameSite": "Lax"} - - -def _cookieyes_cookie(rng: random.Random, now: int, domain: str) -> dict: - return {"name": "cookieyes-consent", - "value": "consentid:" + _b64_rand(rng, 28) + - ",consent:yes,action:yes,necessary:yes,functional:yes,analytics:yes", - "domain": domain, "path": "/", - "expires": now + 395 * 86400, - "secure": True, "sameSite": "Lax"} - - -def _clarity_cookie(rng: random.Random, now: int, domain: str) -> dict: - return {"name": "_clck", - "value": f"{_hex_rand(rng, 8)}|2|f{rng.randint(10, 99)}|0|" - f"{now - rng.randint(60, 180) * 86400}", - "domain": domain, "path": "/", - "expires": now + 365 * 86400, - "secure": True, "sameSite": "Lax"} - - -def _fbp_cookie(rng: random.Random, now: int, domain: str) -> dict: - """Facebook Pixel _fbp = fb...""" - return {"name": "_fbp", - "value": f"fb.1.{(now - rng.randint(60, 30*86400)) * 1000}." - f"{rng.randint(100000000, 9999999999)}", - "domain": domain, "path": "/", - "expires": now + 90 * 86400, - "secure": True, "sameSite": "Lax"} - - -def _gtm_cookie(rng: random.Random, now: int, domain: str) -> dict: - """_dc_gtm_=1 — Google Tag Manager throttle flag.""" - container = f"UA-{rng.randint(10000000, 99999999)}-{rng.randint(1, 9)}" - return {"name": f"_dc_gtm_{container}", - "value": "1", - "domain": domain, "path": "/", - "expires": now + 60, - "secure": True, "sameSite": "Lax"} - - -def _hssrc_cookie(rng: random.Random, now: int, domain: str) -> dict: - """HubSpot referrer flag — small int.""" - return {"name": "__hssrc", - "value": str(rng.randint(1, 5)), - "domain": domain, "path": "/", - "expires": now + 1800, - "secure": True, "sameSite": "Lax"} - - -def _cookies_for_profile(profile: str, rng: random.Random, - now: int, domain: str) -> List[dict]: - """Map cookie_profile tag (from browsing_pool.json) → concrete cookies. - - Each recipe is a realistic combination observed on real production sites - in that category. Cookie age and sub-recipe variance (e.g., OneTrust vs - CookieYes for consent banner) are deterministic from rng. - """ - domain = _norm_domain(domain) - if profile == "minimal": - return [_ga_cookie(rng, now, domain)] - if profile == "ga_only": - out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)] - # 30% chance of GTM helper paired with GA - if rng.random() < 0.3: - out.append(_gtm_cookie(rng, now, domain)) - return out - if profile == "ga_cf": - return [_ga_cookie(rng, now, domain), _cf_bm_cookie(rng, now, domain)] - if profile == "ga_consent": - out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain)] - out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5 - else _cookieyes_cookie(rng, now, domain)) - if rng.random() < 0.4: - out.append(_gtm_cookie(rng, now, domain)) - return out - if profile == "ga_consent_clarity": - # Heavy-tracking site profile: GA + Clarity + consent + often FB pixel - out = [_ga_cookie(rng, now, domain), _gid_cookie(rng, now, domain), - _clarity_cookie(rng, now, domain)] - out.append(_onetrust_cookie(rng, now, domain) if rng.random() < 0.5 - else _cookieyes_cookie(rng, now, domain)) - if rng.random() < 0.5: - out.append(_fbp_cookie(rng, now, domain)) - if rng.random() < 0.4: - out.append(_gtm_cookie(rng, now, domain)) - if rng.random() < 0.25: - out.append(_hssrc_cookie(rng, now, domain)) - return out - # Unknown profile → safe fallback - return [_ga_cookie(rng, now, domain)] - - -# --------------------------------------------------------------------------- -# Public builder -# --------------------------------------------------------------------------- - -def build_cookies(seed: int, - browsing_history: Optional[List[dict]] = None, - now: Optional[int] = None, - timezone: Optional[str] = None) -> List[dict]: - """Build the full cookie list for a persona. - - Args: - seed: persona integer seed (from `Profile.seed`) - browsing_history: list of {name, category, cookie_profile} dicts as - sampled by `_fpforge.derive_browsing_history`. None → empty list - (only the 5 google cookies are returned). - now: unix-seconds timestamp; defaults to current time. Pin for tests. - timezone: IANA tz used to derive CONSENT cookie's `lang+region` token - (e.g. "Europe/Rome" → "it+IT", "America/New_York" → "en+FX"). - """ - ts = now if now is not None else int(time.time()) - cookies: List[dict] = [] - - # 5 .google.com cookies (always) — CONSENT lang derived from tz - rng_g = random.Random(_sub_seed(int(seed), "google")) - cookies.extend(_google_cookies(rng_g, ts, timezone=timezone)) - - # Per-site cookies (deterministic from seed × domain) - for site in (browsing_history or []): - rng_d = random.Random(_sub_seed(int(seed), f"dom:{site['name']}")) - cookies.extend(_cookies_for_profile( - site.get("cookie_profile", "minimal"), rng_d, ts, site["name"] - )) - return cookies - - -def _extract_seed_and_history(profile: Any) -> tuple: - """Accept a Profile object OR a (seed, history) tuple OR just an int seed.""" - if isinstance(profile, int): - return int(profile), [] - seed = int(getattr(profile, "seed")) - history = list(getattr(profile, "browsing_history", []) or []) - return seed, history - - -async def seed_recaptcha_cookies_async(context: Any, profile: Any, - timezone: Optional[str] = None) -> None: - """Async: inject deterministic persona cookies into the context.""" - seed, history = _extract_seed_and_history(profile) - cookies = build_cookies(seed, history, timezone=timezone) - try: - await context.add_cookies(cookies) - except Exception: - pass - - -def seed_recaptcha_cookies_sync(context: Any, profile: Any, - timezone: Optional[str] = None) -> None: - """Sync: inject deterministic persona cookies into the context.""" - seed, history = _extract_seed_and_history(profile) - cookies = build_cookies(seed, history, timezone=timezone) - try: - context.add_cookies(cookies) - except Exception: - pass - - -__all__ = [ - "build_cookies", - "seed_recaptcha_cookies_async", - "seed_recaptcha_cookies_sync", -] diff --git a/src/invisible_playwright/async_api.py b/src/invisible_playwright/async_api.py index 70a7aeb..2b2eeca 100644 --- a/src/invisible_playwright/async_api.py +++ b/src/invisible_playwright/async_api.py @@ -9,7 +9,6 @@ from typing import Any, Dict, Optional, Union from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright from ._fpforge import Profile, generate_profile -from ._geo import resolve_session_timezone from ._headless import make_virtual_display from ._proxy import configure_proxy as _configure_proxy_shared from .download import ensure_binary @@ -52,7 +51,6 @@ class InvisiblePlaywright: extra_prefs: Optional[Dict[str, Any]] = None, binary_path: Optional[str] = None, profile_dir: Optional[Union[str, Path]] = None, - prep_recaptcha: bool = False, ) -> None: # See sync launcher: `zoom.stealth.fpp.hw_seed` is int32_t — clamp. self.seed: int = int(seed) if seed is not None else secrets.randbits(31) @@ -66,8 +64,6 @@ class InvisiblePlaywright: self._extra_prefs = extra_prefs self._binary_path = binary_path self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None - # reCAPTCHA pre-seed gated server-side; respect persistent profile. - self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None self._profile: Profile = generate_profile(self.seed, pin=self._pin) self._pw: Optional[Playwright] = None self._browser: Optional[Browser] = None @@ -76,13 +72,6 @@ class InvisiblePlaywright: async def __aenter__(self) -> Union[Browser, BrowserContext]: import sys as _sys - # Resolve timezone="auto" (and the proxy-set-but-unset default) to a - # concrete IANA zone before anything reads self._timezone. Run the - # blocking geo lookup off the event loop. Fail-early if a proxy is set - # but the egress zone can't be resolved. - self._timezone = await asyncio.to_thread( - resolve_session_timezone, self._timezone, self._proxy - ) executable = self._binary_path or ensure_binary() prefs = translate_profile_to_prefs( self._profile, @@ -135,18 +124,12 @@ class InvisiblePlaywright: def _patch_new_context_defaults(self, browser: Browser) -> None: original = browser.new_context defaults = self._default_context_kwargs() - prep = self._prep_recaptcha - profile = self._profile # pass the whole Profile (seed + browsing_history) - tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region async def patched(**kw): merged = dict(defaults) merged.update(kw) ctx = await original(**merged) _patch_new_page_sleep(ctx) - if prep: - from ._recaptcha_seed import seed_recaptcha_cookies_async - await seed_recaptcha_cookies_async(ctx, profile, timezone=tz) return ctx browser.new_context = patched # type: ignore[assignment] diff --git a/src/invisible_playwright/cli.py b/src/invisible_playwright/cli.py index e6057cf..bb1c687 100644 --- a/src/invisible_playwright/cli.py +++ b/src/invisible_playwright/cli.py @@ -44,13 +44,7 @@ def _cmd_clear_cache(_args: argparse.Namespace) -> int: def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser(prog="invisible-playwright", description="invisible_playwright CLI") - # Top-level `--version` / `-V` flag so `python -m invisible_playwright --version` - # works (Python convention), in addition to the existing `version` subcommand. - p.add_argument( - "-V", "--version", action="version", - version=f"invisible_playwright {__version__} (BINARY_VERSION={BINARY_VERSION}, Firefox {FIREFOX_UPSTREAM_VERSION})", - ) - sub = p.add_subparsers(dest="cmd") + sub = p.add_subparsers(dest="cmd", required=True) sub.add_parser("fetch", help="download the patched Firefox binary") sub.add_parser("path", help="print the absolute path to the cached binary") @@ -60,15 +54,7 @@ def build_parser() -> argparse.ArgumentParser: def main(argv: list[str] | None = None) -> int: - parser = build_parser() - args = parser.parse_args(argv) - if args.cmd is None: - # argparse-conventional: print usage + error message to stderr, exit 2. - # We can't keep `required=True` on the subparsers because that breaks - # the top-level `--version` flag (argparse demands a subcommand even - # when --version is the only token). parser.error() preserves the - # original "no subcommand" exit semantics tests expect. - parser.error("a subcommand is required (try --help, --version, or one of: fetch, path, version, clear-cache)") + args = build_parser().parse_args(argv) dispatch = { "fetch": _cmd_fetch, "path": _cmd_path, diff --git a/src/invisible_playwright/config.py b/src/invisible_playwright/config.py deleted file mode 100644 index c411512..0000000 --- a/src/invisible_playwright/config.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Public helpers for building Firefox launch config without using ``InvisiblePlaywright``. - -Use these when you need to call ``playwright.firefox.launch()`` (or -``firefox.launch_persistent_context()``) directly with our patched binary -and stealth prefs, instead of using the ``InvisiblePlaywright`` context -manager. - -Typical caller is an external integration that owns its own browser -lifecycle (a Crawlee/Skyvern/changedetection-style fetcher, a Playwright -Server wrapper, a multi-language harness) and just wants the building -blocks:: - - from playwright.async_api import async_playwright - from invisible_playwright import ensure_binary, get_default_stealth_prefs - - async with async_playwright() as p: - browser = await p.firefox.launch( - executable_path=str(ensure_binary()), - firefox_user_prefs=get_default_stealth_prefs(seed=42), - ) - -For everyday Python usage the ``InvisiblePlaywright`` context manager is -still the recommended entry point; these helpers expose the same internals -without the lifecycle ownership. - -.. note:: - When calling ``firefox.launch()`` yourself, pass ``headless=False`` and - manage the display hiding (Xvfb on Linux, hidden desktop on Windows) - externally. Passing ``headless=True`` directly to Playwright puts - Firefox in true headless mode, which skips the real rendering pipeline - and breaks canvas / audio / WebGL fingerprint coherence. The - ``InvisiblePlaywright`` context manager does this translation - automatically; the public helpers leave it to the caller. -""" -from __future__ import annotations - -import secrets -from typing import Any, Dict, List, Optional, Union - -from ._fpforge import generate_profile -from .prefs import translate_profile_to_prefs - - -def get_default_stealth_prefs( - seed: Optional[int] = None, - *, - pin: Optional[Dict[str, Any]] = None, - locale: str = "en-US", - timezone: str = "", - extra_prefs: Optional[Dict[str, Any]] = None, - humanize: Union[bool, float] = True, - virtual_display: bool = False, -) -> Dict[str, Any]: - """Build a complete ``firefox_user_prefs`` dict for ``firefox.launch()``. - - Same prefs that ``InvisiblePlaywright(seed=..., locale=..., timezone=..., - extra_prefs=..., humanize=...)`` would inject. Use this when you need to - drive ``playwright.firefox.launch()`` yourself. - - Args: - seed: Integer seed for the Bayesian fingerprint sampler. Same seed - produces the same fingerprint. ``None`` generates a fresh - random int31 (matches ``InvisiblePlaywright`` default). - pin: Optional dict forcing specific fingerprint fields while the - rest stays seed-derived. See ``docs/pinning.md``. - locale: BCP-47 tag (e.g. ``"en-US"``). Drives ``Accept-Language`` - and ``navigator.language``. - timezone: IANA timezone (e.g. ``"America/New_York"``). Empty means - use the host TZ. This pure pref builder does NOT resolve - ``"auto"`` (that needs the proxy + a network lookup at launch - time) — pass a concrete zone here, or use ``InvisiblePlaywright`` - / ``resolve_session_timezone(timezone, proxy)`` for ``"auto"``. - extra_prefs: Optional dict overlaid LAST onto the generated prefs. - humanize: When True (default), every mouse move is expanded into - a Bezier trajectory by the patched Juggler. A float caps the - motion in seconds. False disables the behavior. - virtual_display: When True on Windows, apply GPU-disabling prefs - to prevent GPU process crashes on virtual desktops without - D3D11 backend. - - Returns: - Dict ready to pass as ``firefox_user_prefs=`` to - ``playwright.firefox.launch()`` or ``launch_persistent_context()``. - """ - resolved_seed = int(seed) if seed is not None else secrets.randbits(31) - profile = generate_profile(resolved_seed, pin=pin) - prefs = translate_profile_to_prefs( - profile, - locale=locale, - timezone=timezone, - extra_prefs=extra_prefs, - virtual_display=virtual_display, - ) - prefs["invisible_playwright.humanize"] = bool(humanize) - if humanize: - max_seconds = float(humanize) if not isinstance(humanize, bool) else 1.5 - prefs["invisible_playwright.humanize.maxTime"] = str(max_seconds) - return prefs - - -def get_default_args() -> List[str]: - """Return the default Firefox CLI args to pass via ``args=``. - - Currently empty list, since all our stealth configuration is delivered - via ``firefox_user_prefs`` rather than CLI flags. Exposed for parity - with the ``cloakbrowser.config.get_default_stealth_args`` pattern and - to future-proof integrations that already wire ``args=[*existing, - *get_default_args()]``. - """ - return [] diff --git a/src/invisible_playwright/constants.py b/src/invisible_playwright/constants.py index 295ebf5..b13a458 100644 --- a/src/invisible_playwright/constants.py +++ b/src/invisible_playwright/constants.py @@ -7,7 +7,7 @@ bugfixes don't force a multi-hour Firefox rebuild. from __future__ import annotations # Bump this when a new patched Firefox build is released on GitHub. -BINARY_VERSION: str = "firefox-8" +BINARY_VERSION: str = "firefox-5" # Underlying Firefox version (for display only; does not drive downloads). FIREFOX_UPSTREAM_VERSION: str = "150.0.1" @@ -46,21 +46,3 @@ BINARY_ENTRY_REL = { RELEASE_URL_TEMPLATE = ( "https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}" ) - -# ───────────────────────────────────────────────────────────────────────── -# GeoIP database (timezone="auto" → resolve IANA zone from proxy egress IP) -# ───────────────────────────────────────────────────────────────────────── -# daijro/geoip-all-in-one merges IP2Location LITE + GeoLite2 + DB-IP into a -# single mmdb (country ISO + coordinates + IANA timezone via tzfpy), rebuilt -# weekly. GPL-3.0, so we DOWNLOAD it at runtime into the user cache (like the -# Firefox binary) rather than bundling it into this MIT package. The `-all` -# variant covers IPv4+IPv6. download.py tracks the LATEST release and refreshes -# weekly; GEOIP_MMDB_VERSION is only the cold-cache fallback when the GitHub -# API is unreachable on a machine that has never downloaded the DB. -GEOIP_REPO: str = "daijro/geoip-all-in-one" -GEOIP_MMDB_VERSION: str = "2026.06.03" -GEOIP_ASSET: str = "geoip-aio-all.mmdb.zip" -GEOIP_MMDB_NAME: str = "geoip-aio-all.mmdb" -GEOIP_RELEASE_URL_TEMPLATE: str = ( - "https://github.com/daijro/geoip-all-in-one/releases/download/{tag}/{asset}" -) diff --git a/src/invisible_playwright/download.py b/src/invisible_playwright/download.py index 7417e39..58a5e8f 100644 --- a/src/invisible_playwright/download.py +++ b/src/invisible_playwright/download.py @@ -5,11 +5,9 @@ import hashlib import os import platform import re -import shutil import sys import tarfile import tempfile -import time import zipfile from pathlib import Path @@ -20,10 +18,6 @@ from .constants import ( ARCHIVE_NAME, BINARY_ENTRY_REL, BINARY_VERSION, - GEOIP_ASSET, - GEOIP_MMDB_NAME, - GEOIP_MMDB_VERSION, - GEOIP_RELEASE_URL_TEMPLATE, RELEASE_URL_TEMPLATE, ) @@ -157,136 +151,3 @@ def ensure_binary(version: str = BINARY_VERSION) -> Path: if not entry.exists(): raise RuntimeError(f"binary not found after extraction: {entry}") return entry - - -# ───────────────────────────────────────────────────────────────────────── -# GeoIP mmdb (timezone="auto" → map egress IP → IANA zone) -# -# daijro/geoip-all-in-one is rebuilt WEEKLY, so we don't pin a tag. We cache -# the latest mmdb and, once it's older than GEOIP_REFRESH_DAYS, re-check the -# latest release and pull a newer build if one exists. Net effect: no download -# (not even an API call) on a launch within the window; auto-refresh after it; -# a stale cache is reused when offline rather than breaking the launch. -# ───────────────────────────────────────────────────────────────────────── -GEOIP_REFRESH_DAYS = 7 # matches daijro's weekly rebuild cadence - - -def _geoip_root() -> Path: - return cache_root() / "geoip" - - -def _geoip_check_marker() -> Path: - return _geoip_root() / ".last_check" - - -def _cached_geoip_mmdb() -> Path | None: - """Newest cached mmdb across tag dirs, or None. Tag dirs are date strings - (e.g. ``2026.06.03``) so a lexical sort is chronological.""" - root = _geoip_root() - if not root.exists(): - return None - cands = sorted(root.glob("*/*.mmdb")) - return cands[-1] if cands else None - - -def _geoip_cache_fresh(max_age_days: int) -> bool: - marker = _geoip_check_marker() - if not marker.exists(): - return False - return (time.time() - marker.stat().st_mtime) < max_age_days * 86400 - - -def _touch_geoip_marker() -> None: - m = _geoip_check_marker() - m.parent.mkdir(parents=True, exist_ok=True) - m.touch() - - -def _latest_geoip_tag() -> str: - """Latest ``daijro/geoip-all-in-one`` release tag via the GitHub API.""" - headers = {"Accept": "application/vnd.github+json"} - token = _github_token() - if token: - headers["Authorization"] = f"token {token}" - r = requests.get( - f"https://api.github.com/repos/{GEOIP_REPO}/releases/latest", - headers=headers, timeout=15, - ) - r.raise_for_status() - tag = r.json().get("tag_name") - if not tag: - raise RuntimeError("no tag_name in geoip-all-in-one latest release") - return tag - - -def _download_geoip_tag(tag: str) -> Path: - """Download + extract a specific tag's mmdb if not already cached.""" - dst_dir = _geoip_root() / tag - target = dst_dir / GEOIP_MMDB_NAME - if not target.exists(): - url = GEOIP_RELEASE_URL_TEMPLATE.format(tag=tag, asset=GEOIP_ASSET) - dst_dir.mkdir(parents=True, exist_ok=True) - with tempfile.TemporaryDirectory() as td: - archive = Path(td) / GEOIP_ASSET - _download_file(url, archive) - _extract(archive, dst_dir) - if target.exists(): - return target - # asset name inside the zip may differ from GEOIP_MMDB_NAME - found = sorted(dst_dir.glob("*.mmdb")) - if found: - return found[0] - raise RuntimeError(f"geoip mmdb not found after extraction in {dst_dir}") - - -def _prune_old_geoip_tags(keep: str) -> None: - """Drop every cached tag dir except ``keep`` to bound disk usage.""" - root = _geoip_root() - if not root.exists(): - return - for d in root.iterdir(): - if d.is_dir() and d.name != keep: - shutil.rmtree(d, ignore_errors=True) - - -def geoip_mmdb_path() -> Path | None: - """Path to the currently-cached mmdb (newest tag), or None if none cached.""" - return _cached_geoip_mmdb() - - -def ensure_geoip_mmdb(max_age_days: int = GEOIP_REFRESH_DAYS) -> Path: - """Return a geoip mmdb, kept fresh against daijro's weekly rebuild. - - Resolution order: - 1. ``STEALTHFOX_GEOIP_MMDB`` env → use that file (user-supplied / test). - 2. A cached mmdb younger than ``max_age_days`` → use it (no network). - 3. Else ask GitHub for the latest tag, download it if not already cached, - prune older tags, and reset the freshness timer. - 4. If the API/download is unreachable but a cached mmdb exists → use it - (and reset the timer so we don't hammer the API while offline). - 5. Cold cache + no network → fall back to the pinned ``GEOIP_MMDB_VERSION``; - if that download also fails, raise. - """ - override = os.environ.get("STEALTHFOX_GEOIP_MMDB") - if override: - p = Path(override) - if not p.exists(): - raise RuntimeError(f"STEALTHFOX_GEOIP_MMDB points to a missing file: {p}") - return p - - cached = _cached_geoip_mmdb() - if cached and _geoip_cache_fresh(max_age_days): - return cached - - try: - tag = _latest_geoip_tag() - except Exception: - if cached: - _touch_geoip_marker() # recheck after the window; don't hammer - return cached - tag = GEOIP_MMDB_VERSION # cold cache + API down → pinned fallback - - mmdb = _download_geoip_tag(tag) - _prune_old_geoip_tags(mmdb.parent.name) - _touch_geoip_marker() - return mmdb diff --git a/src/invisible_playwright/launcher.py b/src/invisible_playwright/launcher.py index 15055ee..07c7967 100644 --- a/src/invisible_playwright/launcher.py +++ b/src/invisible_playwright/launcher.py @@ -8,7 +8,6 @@ from typing import Any, Dict, Optional, Union from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright from ._fpforge import Profile, generate_profile -from ._geo import resolve_session_timezone from ._headless import make_virtual_display from ._proxy import configure_proxy as _configure_proxy_shared from .download import ensure_binary @@ -114,7 +113,6 @@ class InvisiblePlaywright: extra_prefs: Optional[Dict[str, Any]] = None, binary_path: Optional[str] = None, profile_dir: Optional[Union[str, Path]] = None, - prep_recaptcha: bool = False, ) -> None: """ Args: @@ -136,14 +134,8 @@ class InvisiblePlaywright: a float caps the motion in seconds. locale: BCP-47 tag (e.g. ``"en-US"``). Drives the ``Accept-Language`` header and ``navigator.language``. - timezone: IANA zone (e.g. ``"America/New_York"``) — used as-is - when set, the only way to force a specific zone. ``""`` - (default) or ``"auto"`` ALWAYS resolves from the egress IP: - through the proxy when one is set, otherwise from the host's - own public IP (one lookup + an offline mmdb). On failure: with - a proxy it raises (a foreign proxy on the host TZ is the - ``timezone_mismatch`` signal); without a proxy it falls back to - the host TZ so a transient lookup failure can't break launch. + timezone: IANA timezone (e.g. ``"America/New_York"``). Empty + means use the host TZ. extra_prefs: Optional dict of Firefox prefs overlayed on top of the generated profile — useful for niche tweaks without monkey-patching the package. @@ -174,10 +166,6 @@ class InvisiblePlaywright: self._extra_prefs = extra_prefs self._binary_path = binary_path self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None - # reCAPTCHA cookie pre-seed — opt-in. Gated server-side: if a - # persistent profile_dir is in use, respect its existing cookies - # and DON'T enable pre-seed (the profile owns its own state). - self._prep_recaptcha = bool(prep_recaptcha) and self._profile_dir is None self._profile: Profile = generate_profile(self.seed, pin=self._pin) self._pw: Optional[Playwright] = None self._browser: Optional[Browser] = None @@ -185,10 +173,6 @@ class InvisiblePlaywright: self._virtual_display: Any = None def __enter__(self) -> Union[Browser, BrowserContext]: - # Resolve timezone="auto" (and the proxy-set-but-unset default) to a - # concrete IANA zone before anything reads self._timezone. Fail-early - # if a proxy is set but the egress zone can't be resolved. - self._timezone = resolve_session_timezone(self._timezone, self._proxy) executable = self._binary_path or ensure_binary() prefs = self._build_prefs() playwright_proxy = _configure_proxy_shared(self._proxy, prefs) @@ -256,18 +240,12 @@ class InvisiblePlaywright: """ original = browser.new_context defaults = self._default_context_kwargs() - prep = self._prep_recaptcha - profile = self._profile # pass the whole Profile (seed + browsing_history) - tz = self._timezone # used by _recaptcha_seed for CONSENT lang+region def patched(**kw): merged = dict(defaults) merged.update(kw) # user-supplied wins ctx = original(**merged) _patch_sync_new_page_sleep(ctx) - if prep: - from ._recaptcha_seed import seed_recaptcha_cookies_sync - seed_recaptcha_cookies_sync(ctx, profile, timezone=tz) return ctx browser.new_context = patched # type: ignore[assignment] diff --git a/src/invisible_playwright/prefs.py b/src/invisible_playwright/prefs.py index 4f0a15d..496fd04 100644 --- a/src/invisible_playwright/prefs.py +++ b/src/invisible_playwright/prefs.py @@ -289,29 +289,13 @@ _BASELINE: Dict[str, Any] = { "network.dns.echconfig.enabled": False, "network.dns.use_https_rr_as_altsvc": False, - # === Fission / site-isolation disabled (FF146 Playwright parity) === - # Force a single content-process model. Three knobs are required in FF150: - # upstream Playwright Firefox (FF146-based) only needed fission.autostart=False - # because FF146's default isolation strategy was looser. FF150 ships with - # fission.webContentIsolationStrategy=1 (IsolateEverything) which still - # site-isolates cross-origin iframes into separate `webIsolated` content - # processes EVEN WHEN fission.autostart is False. From the parent process's - # point of view, those iframes get a Juggler Frame placeholder with no - # docShell, no URL, and an execution context that wraps the wrong global, - # so frame.evaluate() fails with cross-origin SOP errors and - # element_handle.content_frame() returns None. - # - # Pinning the strategy to 0 keeps every cross-origin web iframe in the - # parent's content process, where the Juggler code paths from the FF146 - # era expect them. processCount.webIsolated=1 is kept as belt-and-suspenders - # in case some path still classifies an origin as webIsolated despite the - # strategy change. It costs nothing to leave. - # - # See issue #20 + tests/test_cross_origin_iframe.py for the regression - # sentinel that catches a future A/B flipping these back. + # === A/B VARIANT B: Fission disabled === + # Force single content-process model (e10s only, no BC outer/inner split). + # Diagnostic for the FF150 BC-swap theory: if peet_ws/fppro/sannysoft + # work with this off, the Juggler FF146 baseline breaks specifically on + # cross-process navigation tracking. "fission.autostart": False, "fission.autostart.session": False, - "fission.webContentIsolationStrategy": 0, # IsolateNothing "dom.ipc.processCount.webIsolated": 1, @@ -401,19 +385,19 @@ _WIN_VIRT_DESKTOP_WORKAROUNDS: Dict[str, Any] = { # restores hardware compositor + functional WebGL on alt desktops. "security.sandbox.gpu.level": 0, # Same root cause as above, content process side. Wrapper repo issue #18 - # (tab crash on cross-process navigation under headless=True). Sandbox - # content level > 4 puts content processes on the sandbox's own - # kAlternateWinstation (see security/sandbox/win/src/sandboxbroker/ - # sandboxBroker.cpp line 1113-1114: + # (id.sky.com tab crash). Sandbox content level > 4 puts content processes + # on the sandbox's own kAlternateWinstation (see + # security/sandbox/win/src/sandboxbroker/sandboxBroker.cpp line 1113-1114: # `if (aSandboxLevel > 4) config->SetDesktop(kAlternateWinstation)`). # Combined with our CreateDesktop alt-desktop, that puts browser process # and content processes on DIFFERENT desktops. Cross-process navigation - # then fails window parenting between parent and child, the content + # (Adobe AppMeasurement → new origin → new content process on a new + # desktop) then fails window parenting between parent and child → content # process exits cleanly (exitCode=0, signal=null) and Playwright fires # page.on('crash') ~10s after page load. Lowering content sandbox to 4 # keeps content processes on the same desktop as the browser process, - # which is what we want here (still tight enough — level 4 blocks - # file/registry write, network calls, hardware access). + # which is what we want here (and is still tight enough — level 4 + # blocks file/registry write, network calls, hardware access). "security.sandbox.content.level": 4, } diff --git a/tests/test_cross_origin_iframe.py b/tests/test_cross_origin_iframe.py deleted file mode 100644 index 8be39ac..0000000 --- a/tests/test_cross_origin_iframe.py +++ /dev/null @@ -1,295 +0,0 @@ -"""Regression tests for cross-origin / cross-process iframe interaction. - -History: wrapper repo issue #20 reported that a third-party cookie -consent iframe was completely unreachable from Playwright in 0.1.7 — -``element_handle.content_frame()`` returned ``None``, ``frame.evaluate()`` -threw cross-origin SOP errors, and ``frame_locator().click()`` timed -out. - -Root cause was a missing pref. FF150 ships with -``fission.webContentIsolationStrategy=1`` (IsolateEverything), which -site-isolates cross-origin iframes into separate webIsolated content -processes even when ``fission.autostart=False``. The Juggler code paths -inherited from the FF146 era assume same-process iframes. The wrapper's -``_BASELINE`` now pins the pref to 0 (IsolateNothing). - -These tests exist so a future Firefox upgrade or a fingerprint A/B -that flips this pref by accident cannot ship without a red CI signal. - -Layers: - * ``unit`` — ``_BASELINE`` contains the pref with the right value. No browser. - * ``e2e`` — launch the real binary against a LOCAL HTTP harness on - ``127.0.0.1`` (two ports = two SOP origins) and verify the - four protocol operations that regressed: frame URL tracking, - ``handle.content_frame()``, ``frame.evaluate()``, and - ``frame_locator(...).locator(...)`` element resolution. - -The e2e tests run entirely offline. They never call out to a real site; -the cross-origin shape is reproduced with two local HTTP servers on -random free ports. -""" -from __future__ import annotations - -import socket -import sys -import threading -from http.server import BaseHTTPRequestHandler, HTTPServer - -import pytest - -from invisible_playwright._fpforge import generate_profile -from invisible_playwright.prefs import _BASELINE, translate_profile_to_prefs - - -# ──────────────────────────────────────────────────────────────────── -# Unit layer — fast, no browser, runs on every CI -# ──────────────────────────────────────────────────────────────────── - - -@pytest.mark.unit -def test_baseline_pins_web_content_isolation_strategy_to_zero(): - """Regression sentinel. - - ``fission.webContentIsolationStrategy`` MUST be 0 (IsolateNothing). - The FF150 default is 1 (IsolateEverything), which site-isolates - cross-origin iframes into separate webIsolated content processes - and breaks Playwright frame tracking from the parent process. - """ - assert _BASELINE["fission.webContentIsolationStrategy"] == 0, ( - "fission.webContentIsolationStrategy must be 0 (IsolateNothing). " - "If you bumped it for an A/B, cross-origin iframes will appear " - "in page.frames with empty URLs and content_frame() will return " - "None — see the changelog entry that introduced this test." - ) - - -@pytest.mark.unit -def test_baseline_keeps_fission_autostart_off(): - """Belt for the suspenders above. All three prefs are required.""" - assert _BASELINE["fission.autostart"] is False - assert _BASELINE["fission.autostart.session"] is False - assert _BASELINE["dom.ipc.processCount.webIsolated"] == 1 - - -@pytest.mark.unit -def test_translated_profile_propagates_isolation_strategy(): - """The fix must survive translate_profile_to_prefs, not just live in _BASELINE.""" - p = generate_profile(seed=42) - prefs = translate_profile_to_prefs(p) - assert prefs["fission.webContentIsolationStrategy"] == 0 - - -@pytest.mark.unit -def test_extra_prefs_override_can_break_isolation_only_explicitly(): - """If a caller wants to A/B isolation, they have to set it explicitly. - The wrapper does not silently flip it back on. - """ - p = generate_profile(seed=42) - prefs_default = translate_profile_to_prefs(p) - assert prefs_default["fission.webContentIsolationStrategy"] == 0 - - prefs_ab = translate_profile_to_prefs( - p, extra_prefs={"fission.webContentIsolationStrategy": 1} - ) - assert prefs_ab["fission.webContentIsolationStrategy"] == 1 - - -# ──────────────────────────────────────────────────────────────────── -# E2E layer — needs cached binary + bind to localhost ports -# ──────────────────────────────────────────────────────────────────── - - -def _free_port() -> int: - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.bind(("127.0.0.1", 0)) - port = s.getsockname()[1] - s.close() - return port - - -class _SilentHandler(BaseHTTPRequestHandler): - """Suppress per-request access logging so pytest output stays clean.""" - PAYLOAD = b"" # set per-instance via subclassing - - def log_message(self, *_a): - pass - - def do_GET(self): - self.send_response(200) - self.send_header("Content-Type", "text/html; charset=utf-8") - self.send_header("Cache-Control", "no-store") - self.end_headers() - self.wfile.write(self.PAYLOAD) - - -def _serve(payload: bytes, port: int) -> HTTPServer: - """Start an HTTP server on 127.0.0.1:port serving ``payload`` on every GET.""" - handler_cls = type( - "_H", (_SilentHandler,), {"PAYLOAD": payload} - ) - srv = HTTPServer(("127.0.0.1", port), handler_cls) - t = threading.Thread(target=srv.serve_forever, daemon=True) - t.start() - return srv - - -@pytest.fixture -def cross_origin_harness(): - """Spin up TWO local HTTP servers on different localhost ports. - - Two ports = two distinct origins under SOP (same host, different port - → different origin). The parent page on port A embeds an iframe with - src pointing at port B. Same cross-origin browsing-context shape as - a parent-page-plus-third-party-iframe layout, fully offline. - """ - pa, pb = _free_port(), _free_port() - parent_html = f"""parent -

parent

- - - -""".encode("utf-8") - child_html = b""" - - - -""" - sa = _serve(parent_html, pa) - sb = _serve(child_html, pb) - try: - yield {"parent_url": f"http://127.0.0.1:{pa}/", "child_origin": f"http://127.0.0.1:{pb}"} - finally: - sa.shutdown() - sb.shutdown() - - -@pytest.fixture(scope="session") -def firefox_binary(): - """Locate the cached patched Firefox binary or skip.""" - from invisible_playwright.constants import BINARY_ENTRY_REL - if sys.platform not in BINARY_ENTRY_REL: - pytest.skip(f"unsupported platform: {sys.platform}") - from invisible_playwright.download import cache_dir_for_version - entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform] - if not entry.exists(): - pytest.skip( - "patched Firefox binary not cached; run `invisible-playwright fetch` " - "to enable E2E tests" - ) - return str(entry) - - -@pytest.mark.e2e -def test_cross_origin_iframe_url_appears_in_page_frames(firefox_binary, cross_origin_harness): - """``page.frames`` must list the cross-origin iframe with its real URL. - - Before the pref fix, the URL came back as '' because the navigation - observer for the iframe fired in a different content process than - the parent's FrameTree was registered in. - """ - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) - page.wait_for_selector("iframe#ifr_plain", timeout=10_000) - page.wait_for_timeout(500) - - urls = [f.url for f in page.frames] - assert any(cross_origin_harness["child_origin"] in (u or "") for u in urls), ( - f"no frame had the child origin in its URL; page.frames urls = {urls!r}" - ) - - -@pytest.mark.e2e -def test_cross_origin_iframe_content_frame_resolves(firefox_binary, cross_origin_harness): - """``handle.content_frame()`` must return a Frame (not None) for every - cross-origin iframe shape we care about: plain, sandboxed, titled. - """ - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) - page.wait_for_selector("iframe#ifr_plain", timeout=10_000) - page.wait_for_timeout(500) - - for sel in ("iframe#ifr_plain", "iframe#ifr_sandbox", "iframe#ifr_titled"): - handle = page.query_selector(sel) - assert handle is not None, f"{sel!r} not found in DOM" - cf = handle.content_frame() - assert cf is not None, f"{sel!r}: content_frame() returned None" - assert cross_origin_harness["child_origin"] in (cf.url or ""), ( - f"{sel!r}: content_frame().url = {cf.url!r}, " - f"expected child origin {cross_origin_harness['child_origin']!r}" - ) - - -@pytest.mark.e2e -def test_cross_origin_iframe_evaluate_returns_real_values(firefox_binary, cross_origin_harness): - """``frame.evaluate()`` inside the cross-origin iframe must work. - - Pre-fix: every evaluate failed with a cross-origin SOP error because - the iframe ended up with a stale/wrong execution context. - """ - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) - page.wait_for_selector("iframe#ifr_plain", timeout=10_000) - page.wait_for_timeout(500) - - cf = page.query_selector("iframe#ifr_plain").content_frame() - assert cf is not None - href = cf.evaluate("() => location.href") - assert cross_origin_harness["child_origin"] in href - title = cf.evaluate("() => document.title") - assert isinstance(title, str) - n_buttons = cf.evaluate("() => document.querySelectorAll('button').length") - assert n_buttons == 2 - - -@pytest.mark.e2e -def test_cross_origin_iframe_frame_locator_resolves_button(firefox_binary, cross_origin_harness): - """``frame_locator(...).locator(...)`` must reach the button inside the iframe.""" - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) - page.wait_for_selector("iframe#ifr_plain", timeout=10_000) - - for selector in ("button#ok", "button.btn-primary"): - cnt = page.frame_locator("iframe#ifr_plain").locator(selector).count() - assert cnt == 1, f"locator({selector!r}) found {cnt} elements (expected 1)" - - -@pytest.mark.e2e -def test_cross_origin_iframe_dispatch_event_click_works(firefox_binary, cross_origin_harness): - """End-to-end interaction via ``dispatch_event`` must succeed. - - Plain ``.click()`` can trip Playwright's actionability heuristic on - some third-party UIs (same on vanilla Playwright Firefox — not our - regression), but ``dispatch_event('click')`` always works once the - iframe is reachable. - """ - from invisible_playwright import InvisiblePlaywright - - with InvisiblePlaywright(seed=42, binary_path=firefox_binary, humanize=False) as browser: - ctx = browser.new_context() - page = ctx.new_page() - page.goto(cross_origin_harness["parent_url"], wait_until="domcontentloaded", timeout=30_000) - page.wait_for_selector("iframe#ifr_plain", timeout=10_000) - - page.frame_locator("iframe#ifr_plain").locator("button#ok").dispatch_event( - "click", timeout=4_000 - ) - cf = page.query_selector("iframe#ifr_plain").content_frame() - assert cf.evaluate("() => document.title") == "clicked" diff --git a/tests/test_fingerprint_consistency.py b/tests/test_fingerprint_consistency.py index 0a53d27..aa0f96b 100644 --- a/tests/test_fingerprint_consistency.py +++ b/tests/test_fingerprint_consistency.py @@ -306,6 +306,17 @@ def test_navigator_oscpu_matches_userAgent(page): assert "Mac" in oscpu +@pytest.mark.e2e +def test_userAgent_contains_appVersion_chromium_only(page): + """Chromium invariant: UA contains appVersion. Firefox uses a short + appVersion form so the check is gated on `'chrome' in window`.""" + if not _ev(page, "'chrome' in window"): + pytest.skip("Chromium-only invariant") + ua = _ev(page, "navigator.userAgent") + av = _ev(page, "navigator.appVersion") + assert av in ua + + # =========================================================================== # 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString) # =========================================================================== diff --git a/tests/test_geo.py b/tests/test_geo.py deleted file mode 100644 index 39ef5ee..0000000 --- a/tests/test_geo.py +++ /dev/null @@ -1,288 +0,0 @@ -"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution). - -Covers: the precedence policy (resolve_session_timezone), proxy→requests -translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked -mmdb). No real network or mmdb is touched. -""" -import sys -import types - -import pytest - -from invisible_playwright import _geo -from invisible_playwright._geo import ( - GeoTimezoneError, - _proxies_for_requests, - _proxy_is_set, - discover_egress_ip, - ip_to_timezone, - resolve_session_timezone, -) - -SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"} -HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"} - - -# ────────────────────────────────────────────────────────────────────── -# _proxy_is_set -# ────────────────────────────────────────────────────────────────────── -@pytest.mark.unit -@pytest.mark.parametrize( - "proxy,expected", - [ - (None, False), - ({}, False), - ({"server": ""}, False), - ({"server": " "}, False), - ({"server": "direct://"}, False), - ({"server": "DIRECT://"}, False), - ({"server": "socks5://h:1"}, True), - ({"server": "http://h:8080"}, True), - ], -) -def test_proxy_is_set(proxy, expected): - assert _proxy_is_set(proxy) is expected - - -# ────────────────────────────────────────────────────────────────────── -# _proxies_for_requests — scheme + credential translation -# ────────────────────────────────────────────────────────────────────── -@pytest.mark.unit -def test_proxies_socks5_uses_socks5h_remote_dns(): - out = _proxies_for_requests(SOCKS) - assert out["http"] == "socks5h://u:p@gw.example:1080" - assert out["https"] == out["http"] - - -@pytest.mark.unit -def test_proxies_socks4_scheme(): - out = _proxies_for_requests({"server": "socks4://gw:1080"}) - assert out["http"] == "socks4://gw:1080" - - -@pytest.mark.unit -def test_proxies_http_and_https_schemes(): - assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080" - out = _proxies_for_requests({"server": "https://gw:8443"}) - assert out["https"] == "https://gw:8443" - - -@pytest.mark.unit -def test_proxies_no_scheme_defaults_to_http(): - out = _proxies_for_requests({"server": "gw.example:3128"}) - assert out["http"] == "http://gw.example:3128" - - -@pytest.mark.unit -def test_proxies_credentials_are_url_encoded(): - out = _proxies_for_requests( - {"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"} - ) - # '@', ':' and '/' in creds must be percent-encoded so they don't break - # the proxy URL parsing. - assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"] - - -@pytest.mark.unit -def test_proxies_no_credentials_has_no_auth_prefix(): - out = _proxies_for_requests({"server": "socks5://gw:1080"}) - assert out["http"] == "socks5h://gw:1080" - - -# ────────────────────────────────────────────────────────────────────── -# discover_egress_ip — mocked requests -# ────────────────────────────────────────────────────────────────────── -class _FakeResp: - def __init__(self, text, status=200): - self.text = text - self._status = status - - def raise_for_status(self): - if self._status >= 400: - raise RuntimeError(f"HTTP {self._status}") - - -@pytest.mark.unit -def test_discover_egress_ip_first_endpoint_wins(monkeypatch): - calls = [] - - def fake_get(url, **kw): - calls.append(url) - return _FakeResp("203.0.113.7\n") - - monkeypatch.setattr(_geo.requests, "get", fake_get) - assert discover_egress_ip(SOCKS) == "203.0.113.7" - assert len(calls) == 1 # stopped at the first success - - -@pytest.mark.unit -def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch): - seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")]) - - def fake_get(url, **kw): - return next(seq) - - monkeypatch.setattr(_geo.requests, "get", fake_get) - assert discover_egress_ip(HTTP) == "198.51.100.42" - - -@pytest.mark.unit -def test_discover_egress_ip_all_fail_raises(monkeypatch): - def fake_get(url, **kw): - raise OSError("connection refused") - - monkeypatch.setattr(_geo.requests, "get", fake_get) - with pytest.raises(GeoTimezoneError): - discover_egress_ip(SOCKS) - - -@pytest.mark.unit -def test_discover_egress_ip_no_proxy_is_direct(monkeypatch): - # proxy=None → direct request, requests.get must get proxies=None. - seen = {} - - def fake_get(url, **kw): - seen["proxies"] = kw.get("proxies", "MISSING") - return _FakeResp("192.0.2.55") - - monkeypatch.setattr(_geo.requests, "get", fake_get) - assert discover_egress_ip(None) == "192.0.2.55" - assert seen["proxies"] is None - - -# ────────────────────────────────────────────────────────────────────── -# ip_to_timezone — mocked mmdb reader -# ────────────────────────────────────────────────────────────────────── -class _FakeReader: - def __init__(self, record): - self._record = record - - def __enter__(self): - return self - - def __exit__(self, *a): - return False - - def get(self, ip): - return self._record - - -def _install_fake_maxminddb(monkeypatch, record): - mod = types.ModuleType("maxminddb") - mod.open_database = lambda path: _FakeReader(record) - monkeypatch.setitem(sys.modules, "maxminddb", mod) - - -@pytest.mark.unit -def test_ip_to_timezone_reads_location_time_zone(monkeypatch): - _install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}}) - assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome" - - -@pytest.mark.unit -def test_ip_to_timezone_ip_absent_raises(monkeypatch): - _install_fake_maxminddb(monkeypatch, None) - with pytest.raises(GeoTimezoneError): - ip_to_timezone("1.2.3.4", "x.mmdb") - - -@pytest.mark.unit -def test_ip_to_timezone_missing_zone_raises(monkeypatch): - _install_fake_maxminddb(monkeypatch, {"location": {}}) - with pytest.raises(GeoTimezoneError): - ip_to_timezone("1.2.3.4", "x.mmdb") - - -@pytest.mark.unit -def test_ip_to_timezone_invalid_iana_raises(monkeypatch): - _install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}}) - with pytest.raises(GeoTimezoneError): - ip_to_timezone("1.2.3.4", "x.mmdb") - - -# ────────────────────────────────────────────────────────────────────── -# resolve_session_timezone — the precedence policy -# ────────────────────────────────────────────────────────────────────── -@pytest.fixture -def stub_egress(monkeypatch): - """Make egress resolution deterministic + offline; record if it ran.""" - state = {"called": False} - - def fake_discover(proxy=None, **kw): - state["called"] = True - state["proxy_arg"] = proxy - return "203.0.113.7" - - monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover) - monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York") - # ensure_geoip_mmdb is imported from .download at call time - import invisible_playwright.download as dl - - monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb") - return state - - -@pytest.mark.unit -def test_resolve_explicit_iana_wins(stub_egress): - # An explicit zone wins and never triggers resolution (proxy or not). - assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo" - assert resolve_session_timezone("Asia/Tokyo", None) == "Asia/Tokyo" - assert stub_egress["called"] is False - - -@pytest.mark.unit -def test_resolve_empty_with_proxy_resolves_from_proxy(stub_egress): - assert resolve_session_timezone("", SOCKS) == "America/New_York" - assert stub_egress["called"] is True - assert stub_egress["proxy_arg"] == SOCKS # routed through the proxy - - -@pytest.mark.unit -def test_resolve_auto_with_proxy_resolves_from_proxy(stub_egress): - assert resolve_session_timezone("auto", HTTP) == "America/New_York" - assert stub_egress["proxy_arg"] == HTTP - - -@pytest.mark.unit -def test_resolve_empty_no_proxy_resolves_from_host(stub_egress): - # auto ALWAYS resolves — without a proxy, from the host's own public IP. - assert resolve_session_timezone("", None) == "America/New_York" - assert stub_egress["called"] is True - assert stub_egress["proxy_arg"] is None # direct request, no proxy - - -@pytest.mark.unit -def test_resolve_auto_no_proxy_resolves_from_host(stub_egress): - assert resolve_session_timezone("auto", None) == "America/New_York" - assert stub_egress["proxy_arg"] is None - - -@pytest.mark.unit -def test_resolve_direct_proxy_resolves_via_host(stub_egress): - # direct:// counts as "no proxy" → resolve from the host IP, don't skip. - assert resolve_session_timezone("auto", {"server": "direct://"}) == "America/New_York" - assert stub_egress["proxy_arg"] is None - - -@pytest.mark.unit -def test_resolve_no_proxy_failure_falls_back_to_host(monkeypatch): - # Without a proxy, a lookup failure must NOT break the launch → host TZ (""). - def boom(proxy=None, **kw): - raise GeoTimezoneError("offline") - - monkeypatch.setattr(_geo, "discover_egress_ip", boom) - assert resolve_session_timezone("auto", None) == "" - assert resolve_session_timezone("", None) == "" - - -@pytest.mark.unit -def test_resolve_proxy_failure_raises(monkeypatch): - # With a proxy set, a failure must raise — never a silent host-TZ fallback. - def boom(proxy=None, **kw): - raise GeoTimezoneError("no egress") - - monkeypatch.setattr(_geo, "discover_egress_ip", boom) - with pytest.raises(GeoTimezoneError): - resolve_session_timezone("auto", SOCKS) - with pytest.raises(GeoTimezoneError): - resolve_session_timezone("", SOCKS) diff --git a/tests/test_geoip_update.py b/tests/test_geoip_update.py deleted file mode 100644 index 26632b7..0000000 --- a/tests/test_geoip_update.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Unit tests for the intelligent geoip mmdb auto-update in `download.py`. - -daijro/geoip-all-in-one rebuilds weekly; `ensure_geoip_mmdb` keeps the cache -fresh without a download (or API call) on every launch. These tests mock the -cache root, the latest-tag API, and the per-tag download so nothing touches the -network. -""" -import os -import time - -import pytest - -import invisible_playwright.download as dl - - -@pytest.fixture -def cache(tmp_path, monkeypatch): - """Point the cache at tmp_path and clear the env override.""" - monkeypatch.setattr(dl, "cache_root", lambda: tmp_path) - monkeypatch.delenv("STEALTHFOX_GEOIP_MMDB", raising=False) - return tmp_path - - -def _make_cached(root, tag, name=dl.GEOIP_MMDB_NAME): - d = root / "geoip" / tag - d.mkdir(parents=True, exist_ok=True) - f = d / name - f.write_bytes(b"FAKE-MMDB") - return f - - -def _set_marker_age(root, days): - m = root / "geoip" / ".last_check" - m.parent.mkdir(parents=True, exist_ok=True) - m.touch() - old = time.time() - days * 86400 - os.utime(m, (old, old)) - - -# ────────────────────────────────────────────────────────────────────── -# env override -# ────────────────────────────────────────────────────────────────────── -@pytest.mark.unit -def test_env_override_returns_file(tmp_path, monkeypatch): - f = tmp_path / "mine.mmdb" - f.write_bytes(b"X") - monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(f)) - assert dl.ensure_geoip_mmdb() == f - - -@pytest.mark.unit -def test_env_override_missing_raises(tmp_path, monkeypatch): - monkeypatch.setenv("STEALTHFOX_GEOIP_MMDB", str(tmp_path / "nope.mmdb")) - with pytest.raises(RuntimeError): - dl.ensure_geoip_mmdb() - - -# ────────────────────────────────────────────────────────────────────── -# freshness window -# ────────────────────────────────────────────────────────────────────── -@pytest.mark.unit -def test_fresh_cache_no_network(cache, monkeypatch): - f = _make_cached(cache, "2026.06.03") - _set_marker_age(cache, 0) # just checked - - def boom(): - raise AssertionError("latest-tag API must NOT be called within the window") - - monkeypatch.setattr(dl, "_latest_geoip_tag", boom) - assert dl.ensure_geoip_mmdb(max_age_days=7) == f - - -@pytest.mark.unit -def test_stale_same_tag_no_download(cache, monkeypatch): - f = _make_cached(cache, "2026.06.03") - _set_marker_age(cache, 30) # stale → will re-check - monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.03") - # real _download_geoip_tag runs but target exists, so no actual download: - monkeypatch.setattr(dl, "_download_file", lambda *a, **k: (_ for _ in ()).throw( - AssertionError("must not download when tag already cached"))) - assert dl.ensure_geoip_mmdb(max_age_days=7) == f - - -@pytest.mark.unit -def test_stale_new_tag_downloads_and_prunes(cache, monkeypatch): - old = _make_cached(cache, "2026.06.03") - _set_marker_age(cache, 30) - monkeypatch.setattr(dl, "_latest_geoip_tag", lambda: "2026.06.10") - - def fake_download(tag): - return _make_cached(cache, tag) # simulate fetch+extract of the new tag - - monkeypatch.setattr(dl, "_download_geoip_tag", fake_download) - got = dl.ensure_geoip_mmdb(max_age_days=7) - assert got.parent.name == "2026.06.10" - assert not old.parent.exists() # old tag pruned - assert got.exists() - - -# ────────────────────────────────────────────────────────────────────── -# offline resilience -# ────────────────────────────────────────────────────────────────────── -@pytest.mark.unit -def test_api_down_with_cache_uses_cache(cache, monkeypatch): - f = _make_cached(cache, "2026.06.03") - _set_marker_age(cache, 30) - - def boom(): - raise OSError("offline") - - monkeypatch.setattr(dl, "_latest_geoip_tag", boom) - assert dl.ensure_geoip_mmdb(max_age_days=7) == f # stale cache reused, no raise - - -@pytest.mark.unit -def test_cold_cache_api_down_falls_back_to_pinned(cache, monkeypatch): - # no cache at all + API unreachable → pinned GEOIP_MMDB_VERSION fallback. - def boom(): - raise OSError("offline") - - monkeypatch.setattr(dl, "_latest_geoip_tag", boom) - captured = {} - - def fake_download(tag): - captured["tag"] = tag - return _make_cached(cache, tag) - - monkeypatch.setattr(dl, "_download_geoip_tag", fake_download) - got = dl.ensure_geoip_mmdb(max_age_days=7) - assert captured["tag"] == dl.GEOIP_MMDB_VERSION - assert got.exists() diff --git a/tests/test_recaptcha_seed.py b/tests/test_recaptcha_seed.py deleted file mode 100644 index dbd1821..0000000 --- a/tests/test_recaptcha_seed.py +++ /dev/null @@ -1,349 +0,0 @@ -"""Unit tests for the deterministic reCAPTCHA cookie builder. - -Validates the contract: - - 6 .google.com cookies always present - - Per-site cookies built from a `browsing_history` list (sampled by the - Bayesian network in _fpforge) - - Determinism: same (seed, history) → identical content - - Chrome 400-day cookie cap respected - - Playwright add_cookies field requirements satisfied -""" -import pytest - -from invisible_playwright._recaptcha_seed import ( - build_cookies, - _sub_seed, -) - - -pytestmark = pytest.mark.unit - - -_FIXED_NOW = 1779600000 # 2026-05-23, frozen for determinism - - -# Sample browsing history for tests (mimics what _fpforge produces). -_SAMPLE_HISTORY = [ - {"name": "github.com", "category": "dev", "cookie_profile": "ga_cf"}, - {"name": "stackoverflow.com", "category": "dev", "cookie_profile": "ga_consent_clarity"}, - {"name": "amazon.com", "category": "shop", "cookie_profile": "ga_consent_clarity"}, - {"name": "wikipedia.org", "category": "reference", "cookie_profile": "minimal"}, - {"name": "youtube.com", "category": "media", "cookie_profile": "ga_only"}, -] - - -# =========================================================================== -# 1. Set composition -# =========================================================================== - -def test_only_google_cookies_when_no_history(): - """Empty/None history → only the 5 .google.com cookies (1P_JAR removed - in realism round 2 — deprecated by Google 2022).""" - cookies = build_cookies(seed=42, browsing_history=None, now=_FIXED_NOW) - names = sorted(c["name"] for c in cookies) - assert names == sorted(["NID", "CONSENT", "SOCS", - "_GRECAPTCHA", "ENID"]) - assert all(c["domain"] == ".google.com" for c in cookies) - - -def test_browsing_history_adds_host_cookies(): - """Each history site contributes 1+ cookies on its domain.""" - cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - google = [c for c in cookies if c["domain"] == ".google.com"] - assert len(google) == 5 # 1P_JAR removed - - domains = {c["domain"] for c in cookies if c["domain"] != ".google.com"} - for site in _SAMPLE_HISTORY: - assert f".{site['name']}" in domains - - -def test_domain_dot_prefix_normalized(): - """All host cookie domains have a leading dot for sub-domain coverage.""" - cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - for c in cookies: - assert c["domain"].startswith("."), f"missing dot: {c['domain']}" - - -# =========================================================================== -# 2. Cookie profile recipes (each profile yields the expected cookie set) -# =========================================================================== - -def test_profile_minimal_yields_ga_only(): - history = [{"name": "x.com", "cookie_profile": "minimal"}] - cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) - host = [c for c in cookies if c["domain"] == ".x.com"] - names = [c["name"] for c in host] - assert names == ["_ga"] - - -def test_profile_ga_only_yields_ga_and_gid(): - history = [{"name": "x.com", "cookie_profile": "ga_only"}] - cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) - host = [c for c in cookies if c["domain"] == ".x.com"] - names = sorted(c["name"] for c in host) - assert names == ["_ga", "_gid"] - - -def test_profile_ga_cf_yields_ga_and_cf_bm(): - history = [{"name": "x.com", "cookie_profile": "ga_cf"}] - cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) - host = [c for c in cookies if c["domain"] == ".x.com"] - names = sorted(c["name"] for c in host) - assert names == ["__cf_bm", "_ga"] - - -def test_profile_ga_consent_yields_three_cookies(): - history = [{"name": "x.com", "cookie_profile": "ga_consent"}] - cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) - host = [c for c in cookies if c["domain"] == ".x.com"] - names = sorted(c["name"] for c in host) - # Always _ga + _gid + one of OneTrust|CookieYes - assert "_ga" in names and "_gid" in names - assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent")) - assert len(host) == 3 - - -def test_profile_ga_consent_clarity_yields_at_least_four_cookies(): - """Always _ga + _gid + _clck + consent banner. Optionally _fbp, _dc_gtm_*, - __hssrc (probabilistic per rng — see test_new_helper_cookies_*).""" - history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}] - cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) - host = [c for c in cookies if c["domain"] == ".x.com"] - names = sorted(c["name"] for c in host) - assert "_ga" in names and "_gid" in names and "_clck" in names - assert any(n in names for n in ("OptanonAlertBoxClosed", "cookieyes-consent")) - assert len(host) >= 4 # 4 baseline + 0-3 helpers - - -def test_unknown_profile_falls_back_to_ga(): - history = [{"name": "x.com", "cookie_profile": "nonexistent_profile"}] - cookies = build_cookies(seed=42, browsing_history=history, now=_FIXED_NOW) - host = [c for c in cookies if c["domain"] == ".x.com"] - assert [c["name"] for c in host] == ["_ga"] - - -# =========================================================================== -# 3. Determinism -# =========================================================================== - -def test_same_seed_and_history_same_content(): - a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - b = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - assert a == b - - -def test_different_seed_different_content(): - a = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - b = build_cookies(seed=99, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - a_nid = next(c for c in a if c["name"] == "NID")["value"] - b_nid = next(c for c in b if c["name"] == "NID")["value"] - assert a_nid != b_nid - - -def test_history_order_does_not_affect_domain_specific_cookies(): - """Sub-seed is keyed on domain name, not order in history list.""" - h1 = [_SAMPLE_HISTORY[0], _SAMPLE_HISTORY[1]] - h2 = [_SAMPLE_HISTORY[1], _SAMPLE_HISTORY[0]] - a = {(c["domain"], c["name"]): c["value"] - for c in build_cookies(seed=42, browsing_history=h1, now=_FIXED_NOW) - if c["domain"] != ".google.com"} - b = {(c["domain"], c["name"]): c["value"] - for c in build_cookies(seed=42, browsing_history=h2, now=_FIXED_NOW) - if c["domain"] != ".google.com"} - assert a == b - - -def test_sub_seed_distinct_tags_distinct_streams(): - assert _sub_seed(42, "google") != _sub_seed(42, "dom:github.com") - assert _sub_seed(42, "dom:github.com") != _sub_seed(42, "dom:amazon.com") - assert _sub_seed(0, "any") != 0 # seed=0 still produces non-zero sub-seed - - -# =========================================================================== -# 4. Format / structural correctness for the Google batch -# =========================================================================== - -def test_nid_format(): - cookies = build_cookies(seed=42, now=_FIXED_NOW) - nid = next(c for c in cookies if c["name"] == "NID") - prefix, b64 = nid["value"].split("=", 1) - assert prefix.isdigit() and len(prefix) == 3 - # Broadened to 100-540 in realism round 2 to cover historical NID versions - assert 100 <= int(prefix) <= 540 - assert len(b64) == 178 - - -def test_consent_format(): - cookies = build_cookies(seed=42, now=_FIXED_NOW) - consent = next(c for c in cookies if c["name"] == "CONSENT") - assert consent["value"].startswith("YES+cb.") - assert "+FX+" in consent["value"] - - -# =========================================================================== -# 5. Chrome 400-day cookie cap compliance -# =========================================================================== - -def test_all_expiries_within_400_day_cap(): - """Chrome 104+ caps cookie expiry to 400 days. Cookies > 400d silently - truncated / dropped. We tighten everything to <=395d (except __cf_bm - which is short-lived telemetry).""" - cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - max_allowed = _FIXED_NOW + 400 * 86400 - for c in cookies: - # Short-lived telemetry cookies are fine - if c["name"] in ("__cf_bm", "1P_JAR", "_gid"): - continue - assert c["expires"] <= max_allowed, ( - f"Cookie {c['name']} expires {c['expires'] - _FIXED_NOW}s " - f"(> 400d cap) — would be silently dropped" - ) - - -# =========================================================================== -# 6. Playwright add_cookies field requirements -# =========================================================================== - -def test_all_cookies_have_required_playwright_fields(): - cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - for c in cookies: - assert c.get("name"), f"missing name: {c}" - assert c.get("value") is not None, f"missing value: {c}" - assert c.get("domain"), f"missing domain: {c}" - assert c.get("path") == "/", f"path != / for {c['name']}" - - -def test_modern_cookies_marked_secure(): - """Cookies with sameSite=None require secure=True under Firefox/Chrome. - Also generally needed for cookies set via Playwright add_cookies without - a navigation context.""" - cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - for c in cookies: - if c.get("sameSite") == "None": - assert c.get("secure") is True, f"{c['name']} None+!secure invalid" - - -def test_httponly_on_signed_cookies(): - cookies = build_cookies(seed=42, now=_FIXED_NOW) - nid = next(c for c in cookies if c["name"] == "NID") - enid = next(c for c in cookies if c["name"] == "ENID") - assert nid.get("httpOnly") is True - assert enid.get("httpOnly") is True - - -# =========================================================================== -# 7. End-to-end with real fpforge Profile -# =========================================================================== - -def test_with_real_fpforge_profile(): - """End-to-end: generate a real Profile, ensure browsing_history is populated - and build_cookies works against it.""" - from invisible_playwright._fpforge import generate_profile - prof = generate_profile(seed=42) - assert isinstance(prof.browsing_history, list) - # The Bayesian network samples ~15-30 sites per persona - assert 5 <= len(prof.browsing_history) <= 50, \ - f"unexpected history length: {len(prof.browsing_history)}" - # Each entry has the expected fields - for site in prof.browsing_history: - assert "name" in site and "category" in site and "cookie_profile" in site - # build_cookies works against the real profile - cookies = build_cookies(seed=prof.seed, browsing_history=prof.browsing_history, - now=_FIXED_NOW) - # 6 google + at least 1 cookie per visited site - assert len(cookies) >= 6 + len(prof.browsing_history) - - -def test_same_seed_same_browsing_history_via_fpforge(): - """Profile.browsing_history is deterministic from seed (Bayesian sampler).""" - from invisible_playwright._fpforge import generate_profile - a = generate_profile(seed=42).browsing_history - b = generate_profile(seed=42).browsing_history - assert a == b - - -# =========================================================================== -# 8. Realism improvements (2026-05-24 round 2) -# =========================================================================== - -def test_no_1p_jar_cookie(): - """1P_JAR was deprecated by Google in 2022. Including it is an - anachronism flag for fingerprinters that look at cookie freshness.""" - cookies = build_cookies(seed=42, browsing_history=_SAMPLE_HISTORY, now=_FIXED_NOW) - names = {c["name"] for c in cookies} - assert "1P_JAR" not in names - - -def test_nid_prefix_broadened_range(): - """NID 3-digit prefix should cover historical versions (137/105/511/525 - seen in real captures) — range 100-540, not just 500-540.""" - seen_prefixes = set() - for seed in range(200): - cookies = build_cookies(seed=seed, now=_FIXED_NOW) - nid = next(c for c in cookies if c["name"] == "NID") - prefix = int(nid["value"].split("=", 1)[0]) - seen_prefixes.add(prefix) - assert min(seen_prefixes) < 500, f"NID range never goes below 500 ({sorted(seen_prefixes)[:5]})" - assert max(seen_prefixes) <= 540 - - -def test_consent_lang_from_timezone_eu(): - """CONSENT cookie's `lang+region` token derived from IANA timezone.""" - cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Rome") - consent = next(c for c in cookies if c["name"] == "CONSENT") - assert ".it+IT+" in consent["value"], f"expected it+IT in: {consent['value']}" - - -def test_consent_lang_default_fx(): - """Unknown / US timezone → default `en+FX` (non-EU fallback).""" - cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="America/New_York") - consent = next(c for c in cookies if c["name"] == "CONSENT") - assert ".en+FX+" in consent["value"] - - -def test_consent_lang_de_for_berlin(): - cookies = build_cookies(seed=42, now=_FIXED_NOW, timezone="Europe/Berlin") - consent = next(c for c in cookies if c["name"] == "CONSENT") - assert ".de+DE+" in consent["value"] - - -def test_consent_lang_no_timezone_default(): - """timezone=None → default en+FX.""" - cookies = build_cookies(seed=42, now=_FIXED_NOW) - consent = next(c for c in cookies if c["name"] == "CONSENT") - assert ".en+FX+" in consent["value"] - - -def test_new_helper_cookies_appear_in_ga_consent_clarity(): - """ga_consent_clarity recipe should sometimes include _fbp, _dc_gtm_*, __hssrc - (probabilistic per rng). Check across many seeds that they appear.""" - saw_fbp = False - saw_gtm = False - saw_hssrc = False - history = [{"name": "site.com", "cookie_profile": "ga_consent_clarity"}] - for seed in range(100): - cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW) - names = {c["name"] for c in cookies if c["domain"] == ".site.com"} - if "_fbp" in names: saw_fbp = True - if any(n.startswith("_dc_gtm_") for n in names): saw_gtm = True - if "__hssrc" in names: saw_hssrc = True - assert saw_fbp, "_fbp never appeared in 100 seeds (rng pick broken)" - assert saw_gtm, "_dc_gtm_* never appeared in 100 seeds" - assert saw_hssrc, "__hssrc never appeared in 100 seeds" - - -def test_fbp_format(): - """_fbp format: fb...""" - history = [{"name": "x.com", "cookie_profile": "ga_consent_clarity"}] - # Try multiple seeds until we hit a seed that includes _fbp (50% chance) - for seed in range(20): - cookies = build_cookies(seed=seed, browsing_history=history, now=_FIXED_NOW) - fbp = next((c for c in cookies if c["name"] == "_fbp"), None) - if fbp: - parts = fbp["value"].split(".") - assert parts[0] == "fb" - assert parts[1].isdigit() - assert parts[2].isdigit() and len(parts[2]) >= 13 # unix ms - assert parts[3].isdigit() - return - raise AssertionError("never got _fbp across 20 seeds — distribution broken") diff --git a/tests/test_version.py b/tests/test_version.py deleted file mode 100644 index 7702f7f..0000000 --- a/tests/test_version.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Regression tests for issue #24: CLI version reporting. - -Two distinct symptoms reported by `i43-j`: - 1. `python -m invisible_playwright --version` errored out (only the - `version` subcommand worked). - 2. `python -m invisible_playwright version` printed the literal string - "0.1.0" regardless of the installed version (a stale hardcoded - `__version__` in __init__.py that nobody had remembered to bump). - -These tests pin down both behaviours so the regressions don't sneak back -in via a future copy/paste. -""" -import io -import re -import subprocess -import sys -from contextlib import redirect_stdout - -import pytest - -import invisible_playwright -from invisible_playwright import __version__, cli - - -pytestmark = pytest.mark.unit - - -def test_version_matches_installed_package_metadata(): - """__version__ must come from importlib.metadata, not a hardcoded literal, - so it can never drift from the pyproject.toml `version` field.""" - from importlib.metadata import version as pkg_version - assert __version__ == pkg_version("invisible-playwright") - - -def test_version_is_not_the_stale_010_string(): - """Issue #24 regression: __version__ used to be hardcoded as '0.1.0' - and never updated. If this ever returns to a literal '0.1.0' the - package has been published or shipped with stale metadata.""" - assert __version__ != "0.1.0", ( - "__version__ is the stale hardcoded '0.1.0' string — issue #24 has " - "regressed. Use importlib.metadata to derive it from pyproject.toml." - ) - - -def test_version_subcommand_prints_real_version(): - """`invisible-playwright version` must print the actual installed version, - not the old hardcoded '0.1.0'.""" - buf = io.StringIO() - with redirect_stdout(buf): - rc = cli.main(["version"]) - assert rc == 0 - out = buf.getvalue() - assert f"invisible_playwright {__version__}" in out - assert "0.1.0" not in out or __version__ == "0.1.0" # safety: only allowed if truly 0.1.0 - assert "BINARY_VERSION=" in out - assert "Firefox " in out - - -def test_dash_dash_version_flag_works(): - """Issue #24 reporter: `python -m invisible_playwright --version` used to - error with 'the following arguments are required: cmd' because there was - no top-level --version flag, only the `version` subcommand. Now the - Python convention works too.""" - # argparse's --version action calls sys.exit(0) directly, so use subprocess. - r = subprocess.run( - [sys.executable, "-m", "invisible_playwright", "--version"], - capture_output=True, text=True, timeout=15, - ) - assert r.returncode == 0, f"--version returned {r.returncode}, stderr={r.stderr!r}" - # argparse may emit on stdout or stderr depending on version - combined = r.stdout + r.stderr - assert "invisible_playwright" in combined - assert __version__ in combined - - -def test_no_args_prints_help_not_traceback(): - """`python -m invisible_playwright` with no args should be graceful - (print help, exit non-zero) rather than crashing with a traceback.""" - r = subprocess.run( - [sys.executable, "-m", "invisible_playwright"], - capture_output=True, text=True, timeout=15, - ) - # Either prints help (rc=2) or shows usage. Must NOT contain a traceback. - assert "Traceback" not in (r.stdout + r.stderr) - assert "usage:" in (r.stdout + r.stderr).lower() - - -def test_dash_V_short_flag_works(): - """Alias `-V` for `--version` (Python convention).""" - r = subprocess.run( - [sys.executable, "-m", "invisible_playwright", "-V"], - capture_output=True, text=True, timeout=15, - ) - assert r.returncode == 0 - assert __version__ in (r.stdout + r.stderr) - - -def test_version_matches_semver_shape(): - """Sanity: version should look like a semver (digits.digits.digits) - or a PEP-440 dev marker, not a placeholder string.""" - assert re.match(r"^\d+\.\d+\.\d+", __version__), ( - f"__version__ {__version__!r} doesn't look like a real version" - ) diff --git a/tests/test_webrtc_realness.py b/tests/test_webrtc_realness.py deleted file mode 100644 index fec01c0..0000000 --- a/tests/test_webrtc_realness.py +++ /dev/null @@ -1,442 +0,0 @@ -"""WebRTC realness regression tests. - -Two layers, both runnable on GitHub CI: - -* **unit** (`@pytest.mark.unit`) — pure SDP/candidate assertions against golden - samples. No browser, no proxy, no network. These lock in every rule we found - on 2026-06-06: host must be mDNS ``.local``; the synthetic srflx must carry the - egress IP with a GENUINE nICEr priority (never ``local_pref == 0xFFFF``) and a - stable, distinct foundation; CreepJS's resolver must return the egress, and a - host-only SDP must read as "blocked". They run in the standard ``tests.yml``. - -* **e2e** (`@pytest.mark.e2e`) — launch the patched binary and verify the live - ICE gather. "Being behind a proxy" is faked WITHOUT smartproxy: - - the egress IP is injected via ``STEALTHFOX_WEBRTC_PUBLIC_IP`` (RFC 5737 - TEST-NET, so it never collides with a real IP); - - the "behind a TCP-only SOCKS proxy" condition is reproduced by a tiny - in-process SOCKS5 server that relays TCP CONNECT but refuses UDP ASSOCIATE - (exactly a residential TCP-only proxy → WebRTC's default-route UDP probe - fails → exercises the Fix C fallback). No credentials, no external proxy. - Excluded from the default run; a binary is located via ``STEALTHFOX_E2E_BINARY`` - (or the locally-built tree), else the test skips. -""" -from __future__ import annotations - -import os -import re -import select -import socket -import struct -import threading -from http.server import BaseHTTPRequestHandler, HTTPServer - -import pytest - -# ────────────────────────────────────────────────────────────────────────── -# Pure SDP / ICE-candidate helpers (no I/O) — the heart of the sentinels. -# ────────────────────────────────────────────────────────────────────────── -_CAND = re.compile( - r"candidate:(?P\S+)\s+(?P\d+)\s+(?PUDP|TCP|udp|tcp)\s+" - r"(?P\d+)\s+(?P
\S+)\s+(?P\d+)\s+typ\s+(?P\w+)" - r"(?:.*?raddr\s+(?P\S+)\s+rport\s+(?P\d+))?" -) - - -def parse_candidate(line): - """Parse one ``a=candidate:`` / ``candidate:`` line into a dict (or None).""" - m = _CAND.search(line) - if not m: - return None - d = m.groupdict() - d["component"] = int(d["component"]) - d["priority"] = int(d["priority"]) - d["port"] = int(d["port"]) - d["proto"] = d["proto"].upper() - if d["rport"] is not None: - d["rport"] = int(d["rport"]) - return d - - -def decode_priority(prio): - """Split a candidate priority into nICEr's fields (RFC 5245 layout that - nICEr emits: type<<24 | iface<<16 | dir<<13 | stun<<8 | (256-component)).""" - return { - "type_pref": (prio >> 24) & 0xFF, - "iface_pref": (prio >> 16) & 0xFF, - "local_pref": (prio >> 8) & 0xFFFF, - "direction": (prio >> 13) & 0x7, - "stun_priority": (prio >> 8) & 0x1F, - "component": 256 - (prio & 0xFF), - } - - -def is_mdns(addr): - return bool(addr) and str(addr).endswith(".local") - - -def candidates(sdp_or_lines): - if isinstance(sdp_or_lines, str): - lines = re.findall(r"(?:a=)?candidate:[^\r\n]*", sdp_or_lines) - else: - lines = list(sdp_or_lines) - return [c for c in (parse_candidate(l) for l in lines) if c] - - -def host_candidates(cands): - return [c for c in cands if c["typ"] == "host"] - - -def srflx_candidates(cands): - return [c for c in cands if c["typ"] == "srflx"] - - -def host_is_mdns(cands): - """Every host candidate must be a ``.local`` mDNS name, never a raw - LAN IP (the §9.4 leak form that fails BrowserLeaks).""" - hosts = host_candidates(cands) - return bool(hosts) and all(is_mdns(c["address"]) for c in hosts) - - -def srflx_realness(cand, expected_ip=None): - """Return (ok, reasons) for whether ``cand`` looks like a GENUINE nICEr UDP - server-reflexive candidate. Encodes the 2026-06-06 findings.""" - reasons = [] - if cand["typ"] != "srflx": - reasons.append("not a srflx candidate") - return False, reasons - if expected_ip is not None and cand["address"] != expected_ip: - reasons.append(f"address {cand['address']} != expected {expected_ip}") - p = decode_priority(cand["priority"]) - if p["type_pref"] != 100: - reasons.append(f"type_pref {p['type_pref']} != 100 (SRV_RFLX)") - if p["local_pref"] == 0xFFFF: - reasons.append("local_pref == 0xFFFF — impossible nICEr value (the old hardcoded tell)") - elif not (0x7000 <= p["local_pref"] < 0x8000): - reasons.append(f"local_pref {p['local_pref']} outside the genuine ~0x7E00-0x7FFF band") - if not (16 <= p["stun_priority"] <= 31): - reasons.append(f"stun_priority {p['stun_priority']} implausible (expect 31-server_id)") - if cand.get("raddr") not in (None, "0.0.0.0"): - reasons.append(f"raddr {cand['raddr']} not redacted to 0.0.0.0") - return (not reasons), reasons - - -def creep_get_ipaddress(sdp): - """Faithful port of CreepJS's getIPAddress(sdp): connection line first, then - the first candidate IP; '0.0.0.0' counts as blocked. Returns None if blocked - — i.e. exactly what makes CreepJS render 'stun connection: blocked'.""" - blocked = "0.0.0.0" - conn = (re.findall(r"c=IN\s.+\s", sdp) or [""])[0].strip().split(" ") - conn_ip = conn[2] if len(conn) > 2 else "" - if conn_ip and conn_ip != blocked: - return conn_ip - m = re.search(r"(udp|tcp)\s(?:\d|\w)+\s((?:\d|\w|\.|:)+)(?=\s)", sdp, re.I) - ip = m.group(2) if m else None - return ip if (ip and ip != blocked) else None - - -# ────────────────────────────────────────────────────────────────────────── -# Golden samples — real priority/foundation values, TEST-NET IPs (RFC 5737) -# so no real address is ever committed (feedback_pre_push_privacy_check). -# ────────────────────────────────────────────────────────────────────────── -HOST_MDNS = "candidate:0 1 UDP 2122252543 1460e928-16b3-4c66-80ad-04abcdef0000.local 54551 typ host" -HOST_RAW_IP = "candidate:0 1 UDP 2122252543 192.168.1.20 54551 typ host" # §9.4 leak form -VANILLA_SRFLX = "candidate:1 1 UDP 1685987327 203.0.113.50 3755 typ srflx raddr 0.0.0.0 rport 0" -OURS_SRFLX = "candidate:1 1 UDP 1686052863 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0" -# Pre-fix injection: local_pref hardcoded to 0xFFFF (priority 1694498815). The tell. -OLD_BAD_SRFLX = "candidate:2 1 UDP 1694498815 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0" - -SDP_GOOD = ( - "v=0\r\nc=IN IP4 0.0.0.0\r\n" - f"a={HOST_MDNS}\r\na={OURS_SRFLX}\r\n" -) -SDP_BLOCKED = "v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n" # host-only, no srflx - - -# ────────────────────────────────────────────────────────────────────────── -# UNIT sentinels (run on GitHub CI) -# ────────────────────────────────────────────────────────────────────────── -@pytest.mark.unit -def test_parse_and_decode_basics(): - c = parse_candidate(OURS_SRFLX) - assert c["typ"] == "srflx" and c["proto"] == "UDP" - assert c["address"] == "203.0.113.7" and c["raddr"] == "0.0.0.0" and c["rport"] == 0 - p = decode_priority(c["priority"]) - assert p["type_pref"] == 100 and p["stun_priority"] == 31 and p["component"] == 1 - - -@pytest.mark.unit -def test_genuine_srflx_passes(): - for line in (VANILLA_SRFLX, OURS_SRFLX): - ok, reasons = srflx_realness(parse_candidate(line), expected_ip=parse_candidate(line)["address"]) - assert ok, reasons - - -@pytest.mark.unit -def test_old_0xffff_srflx_is_rejected(): - """Fix A sentinel: local_pref == 0xFFFF must be flagged as fake.""" - ok, reasons = srflx_realness(parse_candidate(OLD_BAD_SRFLX)) - assert not ok - assert any("0xFFFF" in r for r in reasons), reasons - - -@pytest.mark.unit -def test_host_must_be_mdns_not_raw_ip(): - """§9.4 sentinel: raw-IP host candidate is a leak; .local is required.""" - assert host_is_mdns(candidates([HOST_MDNS])) is True - assert host_is_mdns(candidates([HOST_RAW_IP])) is False - - -@pytest.mark.unit -def test_srflx_foundation_distinct_from_host(): - """Fix B sentinel: srflx foundation must differ from the host foundations.""" - cands = candidates([HOST_MDNS, OURS_SRFLX]) - host_fnds = {c["foundation"] for c in host_candidates(cands)} - srflx_fnds = {c["foundation"] for c in srflx_candidates(cands)} - assert srflx_fnds and srflx_fnds.isdisjoint(host_fnds) - - -@pytest.mark.unit -def test_creep_resolver_returns_egress_when_srflx_present(): - assert creep_get_ipaddress(SDP_GOOD) == "203.0.113.7" - - -@pytest.mark.unit -def test_creep_resolver_reports_blocked_for_host_only(): - """The exact false-green we shipped: host-only (.local) SDP → no public IP - → CreepJS shows 'blocked'. The resolver must return None here.""" - assert creep_get_ipaddress(SDP_BLOCKED) is None - - -@pytest.mark.unit -def test_mdns_host_is_invisible_to_creep_resolver(): - """A .local host must NOT be mis-read as an IP (the hyphen in the UUID is - what makes CreepJS skip it and fall through to the srflx).""" - assert creep_get_ipaddress("v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n") is None - - -# ────────────────────────────────────────────────────────────────────────── -# Fake-proxy infrastructure for e2e: a tiny TCP-only SOCKS5 server. -# ────────────────────────────────────────────────────────────────────────── -class _Socks5TcpOnly: - """Minimal SOCKS5: no-auth, CONNECT (TCP) relayed, UDP ASSOCIATE refused. - - Reproduces a residential TCP-only proxy: pages load over TCP, but WebRTC's - UDP path is dead — which (for a no-camera page in default_address_only mode) - is exactly what made the default-route probe fail and ICE return zero - candidates before Fix C. - """ - - def __init__(self): - self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self._srv.bind(("127.0.0.1", 0)) - self._srv.listen(16) - self.port = self._srv.getsockname()[1] - self.udp_associate_attempts = 0 - self._stop = False - self._t = threading.Thread(target=self._serve, daemon=True) - self._t.start() - - def _serve(self): - while not self._stop: - try: - conn, _ = self._srv.accept() - except OSError: - break - threading.Thread(target=self._handle, args=(conn,), daemon=True).start() - - def _recv_exact(self, sock, n): - buf = b"" - while len(buf) < n: - chunk = sock.recv(n - len(buf)) - if not chunk: - return None - buf += chunk - return buf - - def _handle(self, conn): - try: - head = self._recv_exact(conn, 2) - if not head or head[0] != 0x05: - conn.close() - return - nmethods = head[1] - self._recv_exact(conn, nmethods) - conn.sendall(b"\x05\x00") # no-auth - req = self._recv_exact(conn, 4) - if not req: - conn.close() - return - ver, cmd, _, atyp = req - if atyp == 0x01: - addr = socket.inet_ntoa(self._recv_exact(conn, 4)) - elif atyp == 0x03: - ln = self._recv_exact(conn, 1)[0] - addr = self._recv_exact(conn, ln).decode("ascii", "ignore") - elif atyp == 0x04: - addr = socket.inet_ntop(socket.AF_INET6, self._recv_exact(conn, 16)) - else: - conn.close() - return - port = struct.unpack("!H", self._recv_exact(conn, 2))[0] - if cmd != 0x01: # not CONNECT (e.g. UDP ASSOCIATE) → refuse - self.udp_associate_attempts += 1 - conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00") # cmd not supported - conn.close() - return - try: - upstream = socket.create_connection((addr, port), timeout=15) - except OSError: - conn.sendall(b"\x05\x04\x00\x01\x00\x00\x00\x00\x00\x00") # host unreachable - conn.close() - return - conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00") # success - self._relay(conn, upstream) - except Exception: - try: - conn.close() - except Exception: - pass - - def _relay(self, a, b): - try: - while True: - r, _, _ = select.select([a, b], [], [], 30) - if not r: - break - for s in r: - data = s.recv(65536) - if not data: - return - (b if s is a else a).sendall(data) - finally: - for s in (a, b): - try: - s.close() - except Exception: - pass - - def close(self): - self._stop = True - try: - self._srv.close() - except Exception: - pass - - -# Same per-event probe CreepJS runs (kept tiny; raw string = one escape level). -_PROBE_JS = r"""async () => { - const pc = new RTCPeerConnection({iceCandidatePoolSize:1, iceServers:[{urls:[ - 'stun:stun4.l.google.com:19302','stun:stun3.l.google.com:19302']}]}); - pc.createDataChannel(''); - const cands = []; - pc.addEventListener('icecandidate', e => { if (e.candidate && e.candidate.candidate) cands.push(e.candidate.candidate); }); - await pc.setLocalDescription(await pc.createOffer({offerToReceiveAudio:1, offerToReceiveVideo:1})); - await new Promise(r => setTimeout(r, 3500)); - const sdp = (pc.localDescription && pc.localDescription.sdp) || ''; - try { pc.close(); } catch(e) {} - return { candidates: cands, sdp }; -}""" - -_FAKE_EGRESS = "203.0.113.7" # RFC 5737 TEST-NET-3 - - -def _e2e_binary(): - cand = os.environ.get("STEALTHFOX_E2E_BINARY") - if cand and os.path.exists(cand): - return cand - built = r"C:\ff\source\obj-x86_64-pc-windows-msvc\dist\bin\firefox.exe" - if os.path.exists(built): - return built - return None - - -@pytest.fixture -def socks5_tcp_only(): - srv = _Socks5TcpOnly() - yield srv - srv.close() - - -@pytest.fixture -def local_https_page(): - """A trivial localhost page (used by the no-proxy srflx test).""" - class H(BaseHTTPRequestHandler): - def do_GET(self): - self.send_response(200) - self.send_header("Content-Type", "text/html") - self.end_headers() - self.wfile.write(b"wrtc") - - def log_message(self, *a): - pass - - httpd = HTTPServer(("127.0.0.1", 0), H) - threading.Thread(target=httpd.serve_forever, daemon=True).start() - yield f"http://127.0.0.1:{httpd.server_address[1]}/" - httpd.shutdown() - - -def _launch(**extra): - from invisible_playwright import InvisiblePlaywright - - kw = {"headless": True, - # Fixed zone so the wrapper does NOT run timezone="auto" egress - # discovery through the (fake) proxy — irrelevant here, we inject the - # egress IP directly and want the launch deterministic/offline. - "timezone": "America/New_York", - "extra_prefs": {"media.peerconnection.ice.obfuscate_host_addresses": True}} - kw.update(extra) - return InvisiblePlaywright(**kw) - - -@pytest.mark.e2e -def test_srflx_is_real_and_resolvable(local_https_page): - """No proxy needed: the egress is faked via the env. Asserts the live srflx - is genuine (Fix A/B) and that CreepJS's resolver returns it (not blocked).""" - binary = _e2e_binary() - if not binary: - pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)") - os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS - os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1" - with _launch(binary_path=binary) as browser: - page = browser.new_context().new_page() - page.goto(local_https_page, wait_until="domcontentloaded", timeout=60000) - res = page.evaluate(_PROBE_JS) - cands = candidates(res["candidates"]) - assert cands, "ICE produced ZERO candidates (blocked)" - assert host_is_mdns(cands), [c["address"] for c in host_candidates(cands)] - srflx = [c for c in srflx_candidates(cands) if c["address"] == _FAKE_EGRESS] - assert srflx, f"no synthetic srflx with {_FAKE_EGRESS}: {res['candidates']}" - ok, reasons = srflx_realness(srflx[0], expected_ip=_FAKE_EGRESS) - assert ok, reasons - # Two srflx for the same base must share ONE stable foundation (Fix B). - assert len({c["foundation"] for c in srflx}) == 1 - assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS - - -@pytest.mark.e2e -def test_not_blocked_behind_tcp_only_socks(socks5_tcp_only): - """Fix C sentinel: behind a TCP-only SOCKS proxy on a remote origin, ICE - must still complete (host .local + synthetic srflx), not return zero - candidates. Without Fix C this page is fully 'blocked'.""" - binary = _e2e_binary() - if not binary: - pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)") - os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS - os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1" - proxy = {"server": f"socks5://127.0.0.1:{socks5_tcp_only.port}"} - try: - with _launch(binary_path=binary, proxy=proxy) as browser: - page = browser.new_context().new_page() - # remote origin loaded THROUGH the local SOCKS proxy (not localhost, - # so no proxy-bypass) → WebRTC proxy config active → Fix C path. - page.goto("https://example.com/", wait_until="domcontentloaded", timeout=70000) - res = page.evaluate(_PROBE_JS) - except Exception as exc: # network/proxy unavailable in this environment - pytest.skip(f"proxy/network path unavailable: {exc!r}") - cands = candidates(res["candidates"]) - assert cands, "behind SOCKS the gather returned ZERO candidates — Fix C regressed (blocked)" - assert host_is_mdns(cands) - assert any(c["address"] == _FAKE_EGRESS for c in srflx_candidates(cands)), res["candidates"] - assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS diff --git a/tests/unit/test_config_public.py b/tests/unit/test_config_public.py deleted file mode 100644 index 0e26e36..0000000 --- a/tests/unit/test_config_public.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Unit tests for the public ``config`` helpers.""" - -import pytest - -from invisible_playwright import ( - ensure_binary, - get_default_args, - get_default_stealth_prefs, -) -from invisible_playwright.config import get_default_stealth_prefs as _direct - - -pytestmark = pytest.mark.unit - - -def test_get_default_args_is_empty_list(): - """Currently no baseline CLI args, but must return a list (mutable, fresh each call).""" - args = get_default_args() - assert args == [] - assert isinstance(args, list) - args.append("--foo") - # next call must return a fresh empty list, not the mutated one - assert get_default_args() == [] - - -def test_get_default_stealth_prefs_random_seed_returns_dict(): - """No seed -> fresh random fingerprint, dict has expected stealth keys.""" - prefs = get_default_stealth_prefs() - assert isinstance(prefs, dict) - assert len(prefs) > 0 - # humanize toggle is always set explicitly - assert "invisible_playwright.humanize" in prefs - assert prefs["invisible_playwright.humanize"] is True - - -def test_get_default_stealth_prefs_seed_is_deterministic(): - """Same seed -> byte-identical prefs across calls.""" - a = get_default_stealth_prefs(seed=42) - b = get_default_stealth_prefs(seed=42) - assert a == b - - -def test_get_default_stealth_prefs_different_seeds_differ(): - """Different seeds -> different prefs.""" - a = get_default_stealth_prefs(seed=1) - b = get_default_stealth_prefs(seed=2) - assert a != b - - -def test_humanize_false_disables_prefs(): - """humanize=False removes the maxTime knob and flips the toggle to False.""" - prefs = get_default_stealth_prefs(seed=42, humanize=False) - assert prefs["invisible_playwright.humanize"] is False - assert "invisible_playwright.humanize.maxTime" not in prefs - - -def test_humanize_default_sets_max_time_1_5(): - """humanize=True -> default maxTime is 1.5s, stored as string.""" - prefs = get_default_stealth_prefs(seed=42, humanize=True) - assert prefs["invisible_playwright.humanize"] is True - assert prefs["invisible_playwright.humanize.maxTime"] == "1.5" - - -def test_humanize_float_overrides_max_time(): - """Float for humanize is the explicit cap in seconds.""" - prefs = get_default_stealth_prefs(seed=42, humanize=3.0) - assert prefs["invisible_playwright.humanize"] is True - assert prefs["invisible_playwright.humanize.maxTime"] == "3.0" - - -def test_extra_prefs_overlay_takes_precedence(): - """extra_prefs overlay LAST overrides any baseline value.""" - prefs = get_default_stealth_prefs( - seed=42, extra_prefs={"some.custom.pref": 999} - ) - assert prefs["some.custom.pref"] == 999 - - -def test_extra_prefs_can_override_baseline(): - """A key in extra_prefs that also exists in baseline gets overridden.""" - baseline = get_default_stealth_prefs(seed=42) - a_baseline_key = next(iter(baseline.keys())) - overridden = get_default_stealth_prefs( - seed=42, extra_prefs={a_baseline_key: "OVERRIDDEN_SENTINEL"} - ) - assert overridden[a_baseline_key] == "OVERRIDDEN_SENTINEL" - - -def test_locale_argument_changes_prefs(): - """Different locales produce different prefs (Accept-Language affected).""" - en = get_default_stealth_prefs(seed=42, locale="en-US") - it = get_default_stealth_prefs(seed=42, locale="it-IT") - assert en != it - - -def test_timezone_argument_changes_prefs(): - """Different timezones produce different prefs.""" - ny = get_default_stealth_prefs(seed=42, timezone="America/New_York") - rome = get_default_stealth_prefs(seed=42, timezone="Europe/Rome") - assert ny != rome - - -def test_pin_argument_forces_specific_fields(): - """Pin forces a specific field while the rest stays seed-derived.""" - plain = get_default_stealth_prefs(seed=42) - pinned = get_default_stealth_prefs( - seed=42, pin={"hardware.concurrency": 999} - ) - # something in the dict must differ vs the plain seed=42 build - assert plain != pinned - - -def test_public_import_matches_direct_import(): - """Top-level re-export and direct module import return identical output.""" - a = get_default_stealth_prefs(seed=42) - b = _direct(seed=42) - assert a == b - - -def test_ensure_binary_is_callable_via_public_namespace(): - """ensure_binary is re-exported and stays callable from the package root.""" - # We don't invoke it (would trigger a network download in CI) — just - # verify the public attribute is the same callable as the underlying. - from invisible_playwright.download import ensure_binary as _direct_eb - assert ensure_binary is _direct_eb