diff --git a/tests/test_fingerprint_consistency.py b/tests/test_fingerprint_consistency.py new file mode 100644 index 0000000..aa0f96b --- /dev/null +++ b/tests/test_fingerprint_consistency.py @@ -0,0 +1,547 @@ +"""Fingerprint consistency / lie-detection tests. + +Complementary to test_fingerprint_surface.py: those tests ask "do you +look like a real browser?" — these ask "are your fingerprint surfaces +INTERNALLY CONSISTENT?" + +Anti-bot systems catch spoofers not by checking each signal in +isolation but by cross-checking related signals. If you spoof UA to +"Windows" but leave navigator.platform as "Linux x86_64", or you spoof +WebGL renderer in the main thread but not in a Web Worker, the +inconsistency proves the spoof is fake. + +Sources studied (all FOSS, MIT-licensed): + - creepjs/src/lies/index.ts — the canonical lie detector + - creepjs/src/worker/index.ts — main-vs-worker scope cross-check + - creepjs/src/math/index.ts — Math.x(p) deterministic equality + - creepjs/src/navigator/index.ts — UA/platform/oscpu invariants + - niespodd/browser-fingerprinting README — worker hwConcurrency, + plugin chain, perf.timeOrigin + +Everything runs against `about:blank` with NO network and NO proxy. + +Run only this file: + pytest tests/test_fingerprint_consistency.py -m e2e -v +""" +from __future__ import annotations + +import sys + +import pytest + +from invisible_playwright import InvisiblePlaywright +from invisible_playwright.constants import BINARY_ENTRY_REL + + +PIN = { + "screen.width": 1920, + "screen.height": 1080, + "screen.avail_width": 1920, + "screen.avail_height": 1040, + "screen.dpr": 1.0, + "hardware.concurrency": 8, + "audio.sample_rate": 48000, + "audio.max_channel_count": 2, +} + + +@pytest.fixture(scope="session") +def firefox_binary(): + """See test_fingerprint_surface.firefox_binary for the lookup chain.""" + import os + env_path = os.environ.get("INVPW_BINARY_PATH") + if env_path: + from pathlib import Path + if Path(env_path).exists(): + return env_path + pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist") + if sys.platform not in BINARY_ENTRY_REL: + pytest.skip(f"unsupported platform: {sys.platform}") + from invisible_playwright.download import cache_dir_for_version + entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform] + if not entry.exists(): + pytest.skip( + "patched Firefox not cached; run " + "`python -m invisible_playwright fetch` first, or set " + "INVPW_BINARY_PATH to a local build" + ) + return str(entry) + + +@pytest.fixture(scope="module") +def page(firefox_binary): + with InvisiblePlaywright( + seed=42, + pin=PIN, + binary_path=firefox_binary, + headless=True, + ) as browser: + ctx = browser.new_context() + p = ctx.new_page() + p.goto("about:blank", timeout=30_000) + yield p + + +def _ev(page, expr): + return page.evaluate(expr) + + +# =========================================================================== +# 1. Math determinism — same input MUST yield same output +# Source: creepjs/src/math/index.ts +# A wrapper that adds noise to Math.* (canvas-spoofing prefs) exposes +# itself here: two consecutive calls with the same input must be +# byte-identical. +# =========================================================================== + + +@pytest.mark.e2e +@pytest.mark.parametrize("fn,arg", [ + ("cos", "1e308"), + ("acos", "0.5"), + ("asin", "0.5"), + ("atan", "Math.PI"), + ("atanh", "0.5"), + ("cbrt", "Math.PI"), + ("cosh", "Math.PI"), + ("exp", "Math.PI"), + ("expm1", "Math.PI"), + ("log", "Math.PI"), + ("log1p", "Math.PI"), + ("log10", "Math.PI"), + ("sin", "Math.PI"), + ("sinh", "Math.PI"), + ("sqrt", "Math.PI"), + ("tan", "Math.PI"), + ("tanh", "Math.PI"), +]) +def test_math_determinism(page, fn, arg): + """Math.() must return the same value across 100 calls.""" + first, last, all_equal = _ev(page, f"""() => {{ + const r = []; + for (let i = 0; i < 100; i++) r.push(Math.{fn}({arg})); + return [r[0], r[99], r.every(x => Object.is(x, r[0]))]; + }}""") + assert all_equal, ( + f"Math.{fn}({arg}) drifts across calls: first={first}, last={last}" + ) + + +@pytest.mark.e2e +def test_math_pow_two_arg_determinism(page): + ok = _ev(page, """() => { + const a = Math.pow(Math.PI, 2); + for (let i = 0; i < 50; i++) { + if (!Object.is(Math.pow(Math.PI, 2), a)) return false; + } + return true; + }""") + assert ok + + +# =========================================================================== +# 2. Worker scope vs main thread — navigator properties MUST agree +# Source: creepjs/src/worker/index.ts +# =========================================================================== + + +def _worker_navigator_dict(page, props): + expr = """async (props) => { + const code = ` + self.onmessage = (e) => { + const out = {}; + for (const p of e.data) { + try { out[p] = self.navigator[p]; } + catch (err) { out[p] = ''; } + } + if (out.languages && Array.isArray(out.languages)) { + out.languages = [...out.languages]; + } + self.postMessage(out); + }; + `; + const blob = new Blob([code], { type: 'application/javascript' }); + const url = URL.createObjectURL(blob); + const worker = new Worker(url); + try { + const result = await new Promise((resolve, reject) => { + worker.onmessage = (e) => resolve(e.data); + worker.onerror = (e) => reject(new Error(e.message)); + worker.postMessage(props); + setTimeout(() => reject(new Error('worker timeout')), 5000); + }); + return result; + } finally { + worker.terminate(); + URL.revokeObjectURL(url); + } + }""" + return page.evaluate(expr, list(props)) + + +@pytest.mark.e2e +def test_worker_userAgent_matches_main(page): + main = _ev(page, "navigator.userAgent") + worker = _worker_navigator_dict(page, ("userAgent",)) + assert worker["userAgent"] == main, ( + f"UA drift main vs worker:\n main: {main!r}\n worker: {worker['userAgent']!r}" + ) + + +@pytest.mark.e2e +def test_worker_hardwareConcurrency_matches_main(page): + main = _ev(page, "navigator.hardwareConcurrency") + worker = _worker_navigator_dict(page, ("hardwareConcurrency",)) + assert worker["hardwareConcurrency"] == main + + +@pytest.mark.e2e +def test_worker_language_matches_main(page): + main = _ev(page, "navigator.language") + worker = _worker_navigator_dict(page, ("language",)) + assert worker["language"] == main + + +@pytest.mark.e2e +def test_worker_languages_matches_main(page): + main = _ev(page, "[...navigator.languages]") + worker = _worker_navigator_dict(page, ("languages",)) + assert list(worker["languages"]) == list(main) + + +@pytest.mark.e2e +def test_worker_platform_matches_main(page): + main = _ev(page, "navigator.platform") + worker = _worker_navigator_dict(page, ("platform",)) + assert worker["platform"] == main + + +# =========================================================================== +# 3. Iframe scope vs window scope +# Source: creepjs/src/lies/index.ts (getBehemothIframe pattern) +# =========================================================================== + + +def _iframe_navigator_dict(page, props): + expr = """(props) => { + const iframe = document.createElement('iframe'); + iframe.style.display = 'none'; + document.body.appendChild(iframe); + const out = {}; + for (const p of props) { + try { out[p] = iframe.contentWindow.navigator[p]; } + catch (e) { out[p] = ''; } + } + if (Array.isArray(out.languages)) out.languages = [...out.languages]; + document.body.removeChild(iframe); + return out; + }""" + return page.evaluate(expr, list(props)) + + +@pytest.mark.e2e +def test_iframe_userAgent_matches_window(page): + main = _ev(page, "navigator.userAgent") + iframe = _iframe_navigator_dict(page, ("userAgent",)) + assert iframe["userAgent"] == main + + +@pytest.mark.e2e +def test_iframe_language_matches_window(page): + main = _ev(page, "navigator.language") + iframe = _iframe_navigator_dict(page, ("language",)) + assert iframe["language"] == main + + +@pytest.mark.e2e +def test_iframe_hardwareConcurrency_matches_window(page): + main = _ev(page, "navigator.hardwareConcurrency") + iframe = _iframe_navigator_dict(page, ("hardwareConcurrency",)) + assert iframe["hardwareConcurrency"] == main + + +@pytest.mark.e2e +def test_iframe_screen_matches_window(page): + main = _ev(page, "[screen.width, screen.height]") + iframe = _ev(page, """() => { + const f = document.createElement('iframe'); + f.style.display = 'none'; + document.body.appendChild(f); + const v = [f.contentWindow.screen.width, f.contentWindow.screen.height]; + document.body.removeChild(f); + return v; + }""") + assert iframe == main + + +# =========================================================================== +# 4. UA self-consistency (creepjs/src/navigator/index.ts) +# =========================================================================== + + +@pytest.mark.e2e +def test_navigator_platform_matches_userAgent_OS(page): + ua = _ev(page, "navigator.userAgent") + platform = _ev(page, "navigator.platform") + if "Windows" in ua: + assert "Win" in platform + elif "Mac" in ua: + assert "Mac" in platform + elif "Linux" in ua or "X11" in ua: + assert "Linux" in platform or "X11" in platform + + +@pytest.mark.e2e +def test_navigator_oscpu_matches_userAgent(page): + """Firefox-only: navigator.oscpu must correlate with UA OS.""" + ua = _ev(page, "navigator.userAgent") + oscpu = _ev(page, "navigator.oscpu || ''") + if not oscpu: + pytest.skip("navigator.oscpu not exposed") + if "Windows" in ua: + assert "Windows" in oscpu + elif "Linux" in ua: + assert "Linux" in oscpu + elif "Mac" in ua: + assert "Mac" in oscpu + + +@pytest.mark.e2e +def test_userAgent_contains_appVersion_chromium_only(page): + """Chromium invariant: UA contains appVersion. Firefox uses a short + appVersion form so the check is gated on `'chrome' in window`.""" + if not _ev(page, "'chrome' in window"): + pytest.skip("Chromium-only invariant") + ua = _ev(page, "navigator.userAgent") + av = _ev(page, "navigator.appVersion") + assert av in ua + + +# =========================================================================== +# 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString) +# =========================================================================== + + +def _is_native_toString(text, fn_name): + """Mirror of CreepJS hasKnownToString — accept the engine-specific + native patterns (single-line on V8, multi-line on SpiderMonkey).""" + import re as _re + name = _re.escape(fn_name) + patterns = [ + rf"^function {name}\(\) \{{ \[native code\] \}}$", + rf"^function get {name}\(\) \{{ \[native code\] \}}$", + rf"^function {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$", + rf"^function get {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$", + ] + return any(_re.match(p, text) for p in patterns) + + +@pytest.mark.e2e +@pytest.mark.parametrize("native_fn,name", [ + ("Function.prototype.toString", "toString"), + ("Function.prototype.bind", "bind"), + ("Function.prototype.call", "call"), + ("Function.prototype.apply", "apply"), + ("Object.getOwnPropertyDescriptor", "getOwnPropertyDescriptor"), + ("Object.defineProperty", "defineProperty"), + ("Array.prototype.slice", "slice"), + ("JSON.stringify", "stringify"), +]) +def test_native_function_self_toString_matches(page, native_fn, name): + """Each native function's `.toString()` must match its engine's + native pattern. A Proxy wrapper or function-rewrite leaks here.""" + text = _ev(page, f"{native_fn}.toString()") + assert _is_native_toString(text, name), ( + f"{native_fn}.toString() not native-shape: {text!r}" + ) + + +# =========================================================================== +# 6. AudioContext / WebGL determinism +# =========================================================================== + + +@pytest.mark.e2e +def test_audio_offline_context_deterministic(page): + """OfflineAudioContext: same graph → byte-identical output.""" + ok = _ev(page, """async () => { + async function render() { + const ctx = new (window.OfflineAudioContext || + window.webkitOfflineAudioContext)(1, 5000, 44100); + const osc = ctx.createOscillator(); + osc.connect(ctx.destination); + osc.start(0); + const buf = await ctx.startRendering(); + return Array.from(buf.getChannelData(0).slice(0, 50)); + } + const a = await render(); + const b = await render(); + return JSON.stringify(a) === JSON.stringify(b); + }""") + assert ok + + +@pytest.mark.e2e +def test_webgl_getParameter_deterministic(page): + """WebGL parameters must not drift across reads.""" + ok = _ev(page, """() => { + const c = document.createElement('canvas'); + const gl = c.getContext('webgl'); + if (!gl) return false; + const params = [gl.MAX_TEXTURE_SIZE, gl.MAX_VIEWPORT_DIMS, + gl.MAX_RENDERBUFFER_SIZE, gl.MAX_VERTEX_ATTRIBS]; + const ref = JSON.stringify(params.map(p => gl.getParameter(p))); + for (let i = 0; i < 50; i++) { + if (JSON.stringify(params.map(p => gl.getParameter(p))) !== ref) { + return false; + } + } + return true; + }""") + assert ok + + +# =========================================================================== +# 7. Locale ↔ Intl cross-consistency +# =========================================================================== + + +@pytest.mark.e2e +def test_navigator_language_matches_Intl_locale(page): + """navigator.language base must agree with Intl.DateTimeFormat locale.""" + nav = _ev(page, "navigator.language").split("-")[0] + intl = _ev(page, + "Intl.DateTimeFormat().resolvedOptions().locale").split("-")[0] + assert nav == intl, ( + f"navigator.language base={nav!r} vs Intl={intl!r}" + ) + + +@pytest.mark.e2e +def test_navigator_language_matches_Intl_NumberFormat(page): + nav = _ev(page, "navigator.language").split("-")[0] + num = _ev(page, + "Intl.NumberFormat().resolvedOptions().locale").split("-")[0] + assert nav == num + + +@pytest.mark.e2e +def test_navigator_language_matches_Intl_Collator(page): + nav = _ev(page, "navigator.language").split("-")[0] + col = _ev(page, + "(new Intl.Collator()).resolvedOptions().locale").split("-")[0] + assert nav == col + + +# =========================================================================== +# 8. Property descriptor shape lies +# Spoofers using Object.defineProperty(navigator, prop, {value: ...}) +# leave a 'value' field on the descriptor — real native props use a getter. +# =========================================================================== + + +_DESCRIPTOR_NATIVE_PROPS = [ + "userAgent", "platform", "hardwareConcurrency", "language", "languages", + "vendor", "appVersion", "appName", "appCodeName", "doNotTrack", + "cookieEnabled", "onLine", "product", "productSub", "buildID", "oscpu", +] + + +@pytest.mark.e2e +@pytest.mark.parametrize("prop", _DESCRIPTOR_NATIVE_PROPS) +def test_navigator_property_descriptor_is_getter_not_value(page, prop): + """Each spoofable navigator.* property must be defined via a native + getter — NOT Object.defineProperty(..., {value: x}). The value-field + descriptor is the lazy spoof leak CreepJS catches.""" + has_lie = _ev(page, f"""() => {{ + let proto = navigator; + let descriptor = null; + while (proto && !descriptor) {{ + descriptor = Object.getOwnPropertyDescriptor(proto, {prop!r}); + proto = Object.getPrototypeOf(proto); + }} + if (!descriptor) return null; + return 'value' in descriptor; + }}""") + if has_lie is None: + pytest.skip(f"navigator.{prop} not exposed") + assert has_lie is False, ( + f"navigator.{prop} descriptor exposes 'value' field — lazy spoof" + ) + + +# =========================================================================== +# 9. performance.timeOrigin + monotonic +# =========================================================================== + + +@pytest.mark.e2e +def test_performance_timeOrigin_stable(page): + assert _ev(page, + "performance.timeOrigin === performance.timeOrigin") + + +@pytest.mark.e2e +def test_performance_now_monotonic(page): + ok = _ev(page, """() => { + let prev = performance.now(); + for (let i = 0; i < 100; i++) { + const cur = performance.now(); + if (cur < prev) return false; + prev = cur; + } + return true; + }""") + assert ok + + +# =========================================================================== +# 10. Window dimension invariants +# =========================================================================== + + +@pytest.mark.e2e +def test_window_inner_not_larger_than_outer(page): + inner, outer = _ev(page, "[window.innerWidth, window.outerWidth]") + assert inner <= outer + + +@pytest.mark.e2e +def test_screen_avail_not_larger_than_screen(page): + aw, w = _ev(page, "[screen.availWidth, screen.width]") + ah, h = _ev(page, "[screen.availHeight, screen.height]") + assert aw <= w and ah <= h + + +# =========================================================================== +# 11. Firefox UA invariants +# =========================================================================== + + +@pytest.mark.e2e +def test_firefox_UA_implies_empty_vendor(page): + """Firefox: navigator.vendor === ''""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + if "Chrome" in _ev(page, "navigator.userAgent"): + pytest.skip("Chrome+Firefox UA — likely synthetic") + assert _ev(page, "navigator.vendor") == "" + + +@pytest.mark.e2e +def test_firefox_appVersion_short_form(page): + """Real Firefox's appVersion is '5.0 (Windows)' form, not the full UA.""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + av = _ev(page, "navigator.appVersion") + ua = _ev(page, "navigator.userAgent") + assert av.startswith("5.0 (") + assert len(av) < len(ua) + + +@pytest.mark.e2e +def test_firefox_UA_implies_appName_Netscape(page): + """navigator.appName === 'Netscape' (historical invariant).""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + assert _ev(page, "navigator.appName") == "Netscape" diff --git a/tests/test_fingerprint_surface.py b/tests/test_fingerprint_surface.py new file mode 100644 index 0000000..cd30076 --- /dev/null +++ b/tests/test_fingerprint_surface.py @@ -0,0 +1,430 @@ +"""Fingerprint surface tests — replicate the checks performed by the canonical +anti-bot detection libraries against an OFFLINE browser session. + +Each test asserts the SAME thing the upstream detector would flag. A pass +here means our patched build appears human to that detector; a fail +means a real stealth hole that anti-bot kits would exploit in production. + +Detector libraries studied (all FOSS, MIT-licensed): + - github.com/fingerprintjs/BotD — 19 detectors, the most + widely deployed client-side + bot detector + - github.com/abrahamjuliot/creepjs — headless / stealth / lies + modules + - github.com/fingerprintjs/fingerprintjs — canvas / audio / color / + touch consistency + - github.com/antoinevastel/fpscanner — UA / platform / oscpu + cross-checks + - bot.sannysoft.com — classic Puppeteer harness + +Everything runs against `about:blank` with NO network and NO proxy. The +suite is intended to be part of the release-gate: pre-push hook runs +`pytest -m e2e` and these tests must be green on every release. + +Run only this file: + pytest tests/test_fingerprint_surface.py -m e2e -v +""" +from __future__ import annotations + +import re +import sys + +import pytest + +from invisible_playwright import InvisiblePlaywright +from invisible_playwright.constants import BINARY_ENTRY_REL + + +# ──────────────────────────────────────────────────────────────────── +# Inline PIN — a coherent mid-range Windows desktop. Not user-config: +# these specific values are what the surface tests assert against. +# Keep PIN small (only fields that JS exposes) and stable across runs. +# ──────────────────────────────────────────────────────────────────── + +PIN = { + "screen.width": 1920, + "screen.height": 1080, + "screen.avail_width": 1920, + "screen.avail_height": 1040, + "screen.dpr": 1.0, + "hardware.concurrency": 8, + "audio.sample_rate": 48000, + "audio.max_channel_count": 2, +} + + +@pytest.fixture(scope="session") +def firefox_binary(): + """Locate the patched Firefox binary. Three lookup paths: + 1. ``INVPW_BINARY_PATH`` env var (for dev iteration against a local build) + 2. Cached binary under ``cache_dir_for_version()`` (post-fetch) + 3. Skip cleanly (no implicit network download).""" + import os + env_path = os.environ.get("INVPW_BINARY_PATH") + if env_path: + from pathlib import Path + if Path(env_path).exists(): + return env_path + pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist") + if sys.platform not in BINARY_ENTRY_REL: + pytest.skip(f"unsupported platform: {sys.platform}") + from invisible_playwright.download import cache_dir_for_version + entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform] + if not entry.exists(): + pytest.skip( + "patched Firefox not cached; run " + "`python -m invisible_playwright fetch` first, or set " + "INVPW_BINARY_PATH to a local build" + ) + return str(entry) + + +@pytest.fixture(scope="module") +def page(firefox_binary): + """One headless browser shared across the whole module. + ~20s startup paid once, then every test runs in ~50ms.""" + with InvisiblePlaywright( + seed=42, + pin=PIN, + binary_path=firefox_binary, + headless=True, + ) as browser: + ctx = browser.new_context() + p = ctx.new_page() + p.goto("about:blank", timeout=30_000) + yield p + + +def _ev(page, expr): + return page.evaluate(expr) + + +# =========================================================================== +# BotD detectors (github.com/fingerprintjs/BotD/tree/main/src/detectors) +# Each detector becomes one pytest. The failure name maps to the BotKind +# constant BotD would emit on the wire. +# =========================================================================== + + +@pytest.mark.e2e +def test_botd_webdriver_property_is_falsey(page): + """BotD: navigator.webdriver === true → HeadlessChrome verdict.""" + assert not _ev(page, "navigator.webdriver"), ( + "navigator.webdriver is truthy — instant HeadlessChrome verdict" + ) + + +@pytest.mark.e2e +def test_botd_app_version_no_headless_token(page): + """BotD detectAppVersion: /headless|electron|slimerjs/i in appVersion.""" + av = _ev(page, "navigator.appVersion") + for token in ("headless", "electron", "slimerjs"): + assert not re.search(token, av, re.I), ( + f"navigator.appVersion contains {token!r}: {av!r}" + ) + + +@pytest.mark.e2e +def test_botd_user_agent_no_headless_or_selenium_token(page): + """BotD: /headless|selenium|phantom/i in UA.""" + ua = _ev(page, "navigator.userAgent") + for token in ("headless", "selenium", "phantom"): + assert not re.search(token, ua, re.I), ( + f"navigator.userAgent contains {token!r}: {ua!r}" + ) + + +@pytest.mark.e2e +def test_botd_function_bind_is_function(page): + """BotD detectFunctionBind: missing Function.prototype.bind = PhantomJS.""" + assert _ev(page, "typeof Function.prototype.bind === 'function'") + + +@pytest.mark.e2e +def test_botd_product_sub_is_gecko_value(page): + """BotD detectProductSub: Firefox must return '20100101'; '20030107' + on a Firefox UA = Chrome-stub leaked under spoof.""" + assert _ev(page, "navigator.productSub") == "20100101", ( + "navigator.productSub must be '20100101' on Firefox" + ) + + +@pytest.mark.e2e +def test_botd_no_process_global(page): + """BotD detectProcess: window.process indicates Electron.""" + assert not _ev(page, + "typeof window.process !== 'undefined' && " + "window.process.type === 'renderer'" + ) + assert not _ev(page, + "typeof window.process !== 'undefined' && " + "window.process.versions != null && " + "typeof window.process.versions.electron !== 'undefined'" + ) + + +@pytest.mark.e2e +def test_botd_eval_length_matches_engine(page): + """BotD detectEvalLengthInconsistency: `eval.toString().length` must be + 37 on Gecko (33 on Chromium, 39 on IE). Mismatch = engine spoof.""" + assert _ev(page, "eval.toString().length") == 37 + + +@pytest.mark.e2e +def test_botd_languages_array_non_empty(page): + """BotD detectLanguagesLengthInconsistency: empty navigator.languages + is the classic HeadlessChrome tell.""" + assert _ev(page, "navigator.languages.length") > 0 + + +@pytest.mark.e2e +def test_botd_plugins_instance_of_PluginArray(page): + """BotD detectPluginsArray: navigator.plugins must be a real PluginArray.""" + assert _ev(page, "navigator.plugins instanceof PluginArray") + + +@pytest.mark.e2e +def test_botd_mime_types_consistent_prototype_chain(page): + """BotD areMimeTypesConsistent: navigator.mimeTypes and each entry + must have proper prototype chain. Spoofers using plain arrays fail.""" + consistent = _ev(page, """() => { + if (typeof navigator.mimeTypes === 'undefined' || + typeof MimeTypeArray === 'undefined') return false; + let ok = Object.getPrototypeOf(navigator.mimeTypes) === MimeTypeArray.prototype; + for (let i = 0; i < navigator.mimeTypes.length; i++) { + ok = ok && Object.getPrototypeOf(navigator.mimeTypes[i]) === MimeType.prototype; + } + return ok; + }""") + assert consistent, "navigator.mimeTypes prototype chain inconsistent" + + +@pytest.mark.e2e +def test_botd_no_distinctive_window_props(page): + """BotD checkDistinctiveProperties: scan window for automation globals.""" + DISTINCTIVE = [ + "awesomium", "RunPerfTest", "CefSharp", "fmget_targets", "geb", + "__nightmare", "nightmare", "__phantomas", "callPhantom", "_phantom", + "wdioElectron", "__webdriverFunc", "_WEBDRIVER_ELEM_CACHE", + "ChromeDriverw", "domAutomation", "domAutomationController", + ] + leaks = [n for n in DISTINCTIVE + if _ev(page, f"typeof window[{n!r}] !== 'undefined'")] + assert not leaks, f"Distinctive bot globals leaked: {leaks}" + + +@pytest.mark.e2e +def test_botd_no_distinctive_document_props(page): + """BotD: document-side automation globals (webdriver/selenium/cdc).""" + DOC_LEAKS = [ + "__webdriver_evaluate", "__selenium_evaluate", + "__webdriver_script_function", "__webdriver_script_func", + "__webdriver_script_fn", "__fxdriver_evaluate", + "__driver_unwrapped", "__webdriver_unwrapped", + "__driver_evaluate", "__selenium_unwrapped", + "__fxdriver_unwrapped", + "$cdc_asdjflasutopfhvcZLmcf", "$cdc_asdjflasutopfhvcZLmcfl_", + "$chrome_asyncScriptInfo", "__$webdriverAsyncExecutor", + ] + leaks = [n for n in DOC_LEAKS + if _ev(page, f"typeof document[{n!r}] !== 'undefined'")] + assert not leaks, f"document carries automation property names: {leaks}" + + +@pytest.mark.e2e +def test_botd_document_html_attributes_clean(page): + """BotD detectDocumentAttributes: html element attrs contain 'selenium' + / 'webdriver' / 'driver' → Selenium verdict.""" + attrs = _ev(page, + "Array.from(document.documentElement.attributes).map(a => a.name + '=' + a.value)") + bad = [a for a in attrs if any(t in a.lower() + for t in ("selenium", "webdriver", "driver"))] + assert not bad, f"HTML attributes contain bot tokens: {bad}" + + +@pytest.mark.e2e +def test_botd_window_size_nonzero(page): + """BotD detectWindowSize: headless without window manager → 0x0.""" + ow = _ev(page, "window.outerWidth") + oh = _ev(page, "window.outerHeight") + assert ow > 0 and oh > 0, ( + f"outerWidth/Height = {ow}/{oh} — headless without window manager" + ) + + +@pytest.mark.e2e +def test_botd_webgl_debug_renderer_info_available(page): + """BotD detectWebGL: WEBGL_debug_renderer_info extension must exist.""" + has_ext = _ev(page, """() => { + const c = document.createElement('canvas'); + const gl = c.getContext('webgl') || c.getContext('experimental-webgl'); + return !!gl && !!gl.getExtension('WEBGL_debug_renderer_info'); + }""") + assert has_ext + + +# =========================================================================== +# sannysoft.com — classic Puppeteer detection harness +# =========================================================================== + + +@pytest.mark.e2e +def test_sannysoft_chrome_object_consistency(page): + """Firefox UA + window.chrome present = bot-framework leak.""" + if "Firefox" in _ev(page, "navigator.userAgent"): + assert not _ev(page, "typeof window.chrome !== 'undefined'") + + +@pytest.mark.e2e +def test_sannysoft_permissions_query_works(page): + """navigator.permissions.query() must return a proper PermissionStatus.""" + ok = _ev(page, """async () => { + if (!navigator.permissions || !navigator.permissions.query) return false; + try { + const r = await navigator.permissions.query({name: 'notifications'}); + return r && typeof r.state === 'string'; + } catch (e) { return false; } + }""") + assert ok + + +@pytest.mark.e2e +def test_sannysoft_iframe_chrome_not_leaked(page): + """iframe.contentWindow.chrome must not leak on Firefox UA.""" + if "Firefox" not in _ev(page, "navigator.userAgent"): + pytest.skip("Firefox-only invariant") + leaks = _ev(page, """() => { + const iframe = document.createElement('iframe'); + iframe.style.display = 'none'; + document.body.appendChild(iframe); + const is = typeof iframe.contentWindow.chrome !== 'undefined'; + document.body.removeChild(iframe); + return is; + }""") + assert not leaks + + +@pytest.mark.e2e +def test_sannysoft_iframe_languages_not_empty(page): + """Iframe-scope navigator.languages must have ≥1 entry.""" + n = _ev(page, """() => { + const f = document.createElement('iframe'); + f.style.display = 'none'; + document.body.appendChild(f); + const len = f.contentWindow.navigator.languages.length; + document.body.removeChild(f); + return len; + }""") + assert n > 0 + + +# =========================================================================== +# FingerprintJS — fingerprint surface coherence +# =========================================================================== + + +@pytest.mark.e2e +def test_fpjs_canvas_2d_context_returns_valid(page): + ok = _ev(page, """() => { + const c = document.createElement('canvas'); + c.width = 100; c.height = 100; + const ctx = c.getContext('2d'); + if (!ctx) return false; + ctx.fillText('test', 10, 10); + const data = c.toDataURL(); + return data.length > 100 && data.startsWith('data:image/png;base64'); + }""") + assert ok + + +@pytest.mark.e2e +def test_fpjs_audio_context_works(page): + ok = _ev(page, """async () => { + try { + const ctx = new (window.OfflineAudioContext || + window.webkitOfflineAudioContext)(1, 5000, 44100); + const osc = ctx.createOscillator(); + osc.connect(ctx.destination); + osc.start(0); + const buf = await ctx.startRendering(); + return buf && buf.length > 0; + } catch (e) { return false; } + }""") + assert ok + + +@pytest.mark.e2e +def test_fpjs_color_gamut_query_works(page): + """matchMedia('(color-gamut: ...)') must match at least srgb.""" + ok = _ev(page, """matchMedia('(color-gamut: srgb)').matches || + matchMedia('(color-gamut: p3)').matches || + matchMedia('(color-gamut: rec2020)').matches""") + assert ok + + +@pytest.mark.e2e +def test_fpjs_screen_color_depth_realistic(page): + """Atypical color depths are headless-distinctive.""" + cd = _ev(page, "screen.colorDepth") + assert cd in (24, 30, 32) + + +# =========================================================================== +# PIN-locked surfaces (the values declared in PIN above) +# =========================================================================== + + +@pytest.mark.e2e +def test_pin_screen_width_lands_in_screen_object(page): + assert _ev(page, "screen.width") == PIN["screen.width"] + + +@pytest.mark.e2e +def test_pin_screen_height_lands_in_screen_object(page): + assert _ev(page, "screen.height") == PIN["screen.height"] + + +@pytest.mark.e2e +def test_pin_hardware_concurrency_lands_in_navigator(page): + assert (_ev(page, "navigator.hardwareConcurrency") + == PIN["hardware.concurrency"]) + + +@pytest.mark.e2e +def test_pin_audio_sample_rate_lands_in_AudioContext(page): + assert _ev(page, + "(new (window.AudioContext||window.webkitAudioContext)()).sampleRate" + ) == PIN["audio.sample_rate"] + + +@pytest.mark.e2e +def test_pin_audio_max_channels_lands_in_destination(page): + assert _ev(page, + "(new (window.AudioContext||window.webkitAudioContext)())" + ".destination.maxChannelCount" + ) == PIN["audio.max_channel_count"] + + +# =========================================================================== +# fpscanner-style cross-checks +# =========================================================================== + + +@pytest.mark.e2e +def test_fpscanner_ua_vs_platform_consistent(page): + """UA OS substring must agree with navigator.platform OS substring.""" + ua = _ev(page, "navigator.userAgent") + platform = _ev(page, "navigator.platform") + if "Windows" in ua: + assert "Win" in platform, f"UA Win but platform={platform!r}" + elif "Mac" in ua: + assert "Mac" in platform + elif "Linux" in ua: + assert "Linux" in platform or "X11" in platform + + +@pytest.mark.e2e +def test_fpscanner_no_userAgentData_on_firefox(page): + """navigator.userAgentData is Chromium-only. Presence on Firefox UA = bot.""" + if "Firefox" in _ev(page, "navigator.userAgent"): + assert not _ev(page, "'userAgentData' in navigator")