test: fingerprint surface + consistency e2e tests

96 e2e tests reproducing the canonical anti-bot / fingerprinting
libraries' checks against a local InvisiblePlaywright session
on about:blank (no network).

Surface (28 tests):
  - BotD: webdriver, app_version, UA tokens, function_bind, productSub,
    process, eval.length, languages, plugins, mimeTypes, distinctive
    window/document globals, html attributes, window size, webgl_debug
  - sannysoft: chrome consistency, permissions.query, iframe chrome,
    iframe languages
  - FpJS: canvas 2D, audio offline, color-gamut, color-depth
  - PIN-locked: screen.width/height, hw.concurrency, audio.sampleRate,
    audio.maxChannelCount
  - fpscanner: UA<->platform, no userAgentData on Firefox

Consistency (68 tests):
  - Math determinism: 17 transcendentals + Math.pow
  - Worker scope vs main: 5 navigator props via Blob worker
  - Iframe scope vs window: 4 props + screen
  - UA self-consistency: UA<->platform, UA<->oscpu, UA<->appVersion
  - Native function self-toString: 8 native APIs
  - AudioContext / WebGL determinism
  - Locale<->Intl: DateTimeFormat / NumberFormat / Collator
  - Descriptor shape lies: 16 navigator props (each must be a getter,
    not Object.defineProperty(value=...))
  - performance.timeOrigin + .now() monotonic
  - Window dimension invariants
  - Firefox UA invariants (vendor='', appName='Netscape',
    appVersion short form)

All marked @pytest.mark.e2e so they're excluded from the default
suite that the pre-push hook runs. Invoke explicitly:

    pytest -m e2e -v

Or against a local build:

    INVPW_BINARY_PATH=/path/to/firefox.exe pytest -m e2e -v

Sources: github.com/fingerprintjs/BotD, abrahamjuliot/creepjs,
fingerprintjs/fingerprintjs, antoinevastel/fpscanner,
niespodd/browser-fingerprinting, bot.sannysoft.com.

Verified: 95 passed, 1 skipped (Chromium-only invariant), 0 failed
against firefox-5 local build.
This commit is contained in:
feder-cr 2026-05-21 13:47:14 -07:00
parent 22b1171518
commit cf59e98fa9
2 changed files with 977 additions and 0 deletions

View file

@ -0,0 +1,547 @@
"""Fingerprint consistency / lie-detection tests.
Complementary to test_fingerprint_surface.py: those tests ask "do you
look like a real browser?" — these ask "are your fingerprint surfaces
INTERNALLY CONSISTENT?"
Anti-bot systems catch spoofers not by checking each signal in
isolation but by cross-checking related signals. If you spoof UA to
"Windows" but leave navigator.platform as "Linux x86_64", or you spoof
WebGL renderer in the main thread but not in a Web Worker, the
inconsistency proves the spoof is fake.
Sources studied (all FOSS, MIT-licensed):
- creepjs/src/lies/index.ts the canonical lie detector
- creepjs/src/worker/index.ts main-vs-worker scope cross-check
- creepjs/src/math/index.ts Math.x(p) deterministic equality
- creepjs/src/navigator/index.ts UA/platform/oscpu invariants
- niespodd/browser-fingerprinting README worker hwConcurrency,
plugin chain, perf.timeOrigin
Everything runs against `about:blank` with NO network and NO proxy.
Run only this file:
pytest tests/test_fingerprint_consistency.py -m e2e -v
"""
from __future__ import annotations
import sys
import pytest
from invisible_playwright import InvisiblePlaywright
from invisible_playwright.constants import BINARY_ENTRY_REL
PIN = {
"screen.width": 1920,
"screen.height": 1080,
"screen.avail_width": 1920,
"screen.avail_height": 1040,
"screen.dpr": 1.0,
"hardware.concurrency": 8,
"audio.sample_rate": 48000,
"audio.max_channel_count": 2,
}
@pytest.fixture(scope="session")
def firefox_binary():
"""See test_fingerprint_surface.firefox_binary for the lookup chain."""
import os
env_path = os.environ.get("INVPW_BINARY_PATH")
if env_path:
from pathlib import Path
if Path(env_path).exists():
return env_path
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
if sys.platform not in BINARY_ENTRY_REL:
pytest.skip(f"unsupported platform: {sys.platform}")
from invisible_playwright.download import cache_dir_for_version
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
if not entry.exists():
pytest.skip(
"patched Firefox not cached; run "
"`python -m invisible_playwright fetch` first, or set "
"INVPW_BINARY_PATH to a local build"
)
return str(entry)
@pytest.fixture(scope="module")
def page(firefox_binary):
with InvisiblePlaywright(
seed=42,
pin=PIN,
binary_path=firefox_binary,
headless=True,
) as browser:
ctx = browser.new_context()
p = ctx.new_page()
p.goto("about:blank", timeout=30_000)
yield p
def _ev(page, expr):
return page.evaluate(expr)
# ===========================================================================
# 1. Math determinism — same input MUST yield same output
# Source: creepjs/src/math/index.ts
# A wrapper that adds noise to Math.* (canvas-spoofing prefs) exposes
# itself here: two consecutive calls with the same input must be
# byte-identical.
# ===========================================================================
@pytest.mark.e2e
@pytest.mark.parametrize("fn,arg", [
("cos", "1e308"),
("acos", "0.5"),
("asin", "0.5"),
("atan", "Math.PI"),
("atanh", "0.5"),
("cbrt", "Math.PI"),
("cosh", "Math.PI"),
("exp", "Math.PI"),
("expm1", "Math.PI"),
("log", "Math.PI"),
("log1p", "Math.PI"),
("log10", "Math.PI"),
("sin", "Math.PI"),
("sinh", "Math.PI"),
("sqrt", "Math.PI"),
("tan", "Math.PI"),
("tanh", "Math.PI"),
])
def test_math_determinism(page, fn, arg):
"""Math.<fn>(<arg>) must return the same value across 100 calls."""
first, last, all_equal = _ev(page, f"""() => {{
const r = [];
for (let i = 0; i < 100; i++) r.push(Math.{fn}({arg}));
return [r[0], r[99], r.every(x => Object.is(x, r[0]))];
}}""")
assert all_equal, (
f"Math.{fn}({arg}) drifts across calls: first={first}, last={last}"
)
@pytest.mark.e2e
def test_math_pow_two_arg_determinism(page):
ok = _ev(page, """() => {
const a = Math.pow(Math.PI, 2);
for (let i = 0; i < 50; i++) {
if (!Object.is(Math.pow(Math.PI, 2), a)) return false;
}
return true;
}""")
assert ok
# ===========================================================================
# 2. Worker scope vs main thread — navigator properties MUST agree
# Source: creepjs/src/worker/index.ts
# ===========================================================================
def _worker_navigator_dict(page, props):
expr = """async (props) => {
const code = `
self.onmessage = (e) => {
const out = {};
for (const p of e.data) {
try { out[p] = self.navigator[p]; }
catch (err) { out[p] = '<error: ' + err.message + '>'; }
}
if (out.languages && Array.isArray(out.languages)) {
out.languages = [...out.languages];
}
self.postMessage(out);
};
`;
const blob = new Blob([code], { type: 'application/javascript' });
const url = URL.createObjectURL(blob);
const worker = new Worker(url);
try {
const result = await new Promise((resolve, reject) => {
worker.onmessage = (e) => resolve(e.data);
worker.onerror = (e) => reject(new Error(e.message));
worker.postMessage(props);
setTimeout(() => reject(new Error('worker timeout')), 5000);
});
return result;
} finally {
worker.terminate();
URL.revokeObjectURL(url);
}
}"""
return page.evaluate(expr, list(props))
@pytest.mark.e2e
def test_worker_userAgent_matches_main(page):
main = _ev(page, "navigator.userAgent")
worker = _worker_navigator_dict(page, ("userAgent",))
assert worker["userAgent"] == main, (
f"UA drift main vs worker:\n main: {main!r}\n worker: {worker['userAgent']!r}"
)
@pytest.mark.e2e
def test_worker_hardwareConcurrency_matches_main(page):
main = _ev(page, "navigator.hardwareConcurrency")
worker = _worker_navigator_dict(page, ("hardwareConcurrency",))
assert worker["hardwareConcurrency"] == main
@pytest.mark.e2e
def test_worker_language_matches_main(page):
main = _ev(page, "navigator.language")
worker = _worker_navigator_dict(page, ("language",))
assert worker["language"] == main
@pytest.mark.e2e
def test_worker_languages_matches_main(page):
main = _ev(page, "[...navigator.languages]")
worker = _worker_navigator_dict(page, ("languages",))
assert list(worker["languages"]) == list(main)
@pytest.mark.e2e
def test_worker_platform_matches_main(page):
main = _ev(page, "navigator.platform")
worker = _worker_navigator_dict(page, ("platform",))
assert worker["platform"] == main
# ===========================================================================
# 3. Iframe scope vs window scope
# Source: creepjs/src/lies/index.ts (getBehemothIframe pattern)
# ===========================================================================
def _iframe_navigator_dict(page, props):
expr = """(props) => {
const iframe = document.createElement('iframe');
iframe.style.display = 'none';
document.body.appendChild(iframe);
const out = {};
for (const p of props) {
try { out[p] = iframe.contentWindow.navigator[p]; }
catch (e) { out[p] = '<error: ' + e.message + '>'; }
}
if (Array.isArray(out.languages)) out.languages = [...out.languages];
document.body.removeChild(iframe);
return out;
}"""
return page.evaluate(expr, list(props))
@pytest.mark.e2e
def test_iframe_userAgent_matches_window(page):
main = _ev(page, "navigator.userAgent")
iframe = _iframe_navigator_dict(page, ("userAgent",))
assert iframe["userAgent"] == main
@pytest.mark.e2e
def test_iframe_language_matches_window(page):
main = _ev(page, "navigator.language")
iframe = _iframe_navigator_dict(page, ("language",))
assert iframe["language"] == main
@pytest.mark.e2e
def test_iframe_hardwareConcurrency_matches_window(page):
main = _ev(page, "navigator.hardwareConcurrency")
iframe = _iframe_navigator_dict(page, ("hardwareConcurrency",))
assert iframe["hardwareConcurrency"] == main
@pytest.mark.e2e
def test_iframe_screen_matches_window(page):
main = _ev(page, "[screen.width, screen.height]")
iframe = _ev(page, """() => {
const f = document.createElement('iframe');
f.style.display = 'none';
document.body.appendChild(f);
const v = [f.contentWindow.screen.width, f.contentWindow.screen.height];
document.body.removeChild(f);
return v;
}""")
assert iframe == main
# ===========================================================================
# 4. UA self-consistency (creepjs/src/navigator/index.ts)
# ===========================================================================
@pytest.mark.e2e
def test_navigator_platform_matches_userAgent_OS(page):
ua = _ev(page, "navigator.userAgent")
platform = _ev(page, "navigator.platform")
if "Windows" in ua:
assert "Win" in platform
elif "Mac" in ua:
assert "Mac" in platform
elif "Linux" in ua or "X11" in ua:
assert "Linux" in platform or "X11" in platform
@pytest.mark.e2e
def test_navigator_oscpu_matches_userAgent(page):
"""Firefox-only: navigator.oscpu must correlate with UA OS."""
ua = _ev(page, "navigator.userAgent")
oscpu = _ev(page, "navigator.oscpu || ''")
if not oscpu:
pytest.skip("navigator.oscpu not exposed")
if "Windows" in ua:
assert "Windows" in oscpu
elif "Linux" in ua:
assert "Linux" in oscpu
elif "Mac" in ua:
assert "Mac" in oscpu
@pytest.mark.e2e
def test_userAgent_contains_appVersion_chromium_only(page):
"""Chromium invariant: UA contains appVersion. Firefox uses a short
appVersion form so the check is gated on `'chrome' in window`."""
if not _ev(page, "'chrome' in window"):
pytest.skip("Chromium-only invariant")
ua = _ev(page, "navigator.userAgent")
av = _ev(page, "navigator.appVersion")
assert av in ua
# ===========================================================================
# 5. Native function self-toString (creepjs/src/lies/index.ts hasKnownToString)
# ===========================================================================
def _is_native_toString(text, fn_name):
"""Mirror of CreepJS hasKnownToString — accept the engine-specific
native patterns (single-line on V8, multi-line on SpiderMonkey)."""
import re as _re
name = _re.escape(fn_name)
patterns = [
rf"^function {name}\(\) \{{ \[native code\] \}}$",
rf"^function get {name}\(\) \{{ \[native code\] \}}$",
rf"^function {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$",
rf"^function get {name}\(\) \{{[\s\S]*\[native code\][\s\S]*\}}$",
]
return any(_re.match(p, text) for p in patterns)
@pytest.mark.e2e
@pytest.mark.parametrize("native_fn,name", [
("Function.prototype.toString", "toString"),
("Function.prototype.bind", "bind"),
("Function.prototype.call", "call"),
("Function.prototype.apply", "apply"),
("Object.getOwnPropertyDescriptor", "getOwnPropertyDescriptor"),
("Object.defineProperty", "defineProperty"),
("Array.prototype.slice", "slice"),
("JSON.stringify", "stringify"),
])
def test_native_function_self_toString_matches(page, native_fn, name):
"""Each native function's `.toString()` must match its engine's
native pattern. A Proxy wrapper or function-rewrite leaks here."""
text = _ev(page, f"{native_fn}.toString()")
assert _is_native_toString(text, name), (
f"{native_fn}.toString() not native-shape: {text!r}"
)
# ===========================================================================
# 6. AudioContext / WebGL determinism
# ===========================================================================
@pytest.mark.e2e
def test_audio_offline_context_deterministic(page):
"""OfflineAudioContext: same graph → byte-identical output."""
ok = _ev(page, """async () => {
async function render() {
const ctx = new (window.OfflineAudioContext ||
window.webkitOfflineAudioContext)(1, 5000, 44100);
const osc = ctx.createOscillator();
osc.connect(ctx.destination);
osc.start(0);
const buf = await ctx.startRendering();
return Array.from(buf.getChannelData(0).slice(0, 50));
}
const a = await render();
const b = await render();
return JSON.stringify(a) === JSON.stringify(b);
}""")
assert ok
@pytest.mark.e2e
def test_webgl_getParameter_deterministic(page):
"""WebGL parameters must not drift across reads."""
ok = _ev(page, """() => {
const c = document.createElement('canvas');
const gl = c.getContext('webgl');
if (!gl) return false;
const params = [gl.MAX_TEXTURE_SIZE, gl.MAX_VIEWPORT_DIMS,
gl.MAX_RENDERBUFFER_SIZE, gl.MAX_VERTEX_ATTRIBS];
const ref = JSON.stringify(params.map(p => gl.getParameter(p)));
for (let i = 0; i < 50; i++) {
if (JSON.stringify(params.map(p => gl.getParameter(p))) !== ref) {
return false;
}
}
return true;
}""")
assert ok
# ===========================================================================
# 7. Locale ↔ Intl cross-consistency
# ===========================================================================
@pytest.mark.e2e
def test_navigator_language_matches_Intl_locale(page):
"""navigator.language base must agree with Intl.DateTimeFormat locale."""
nav = _ev(page, "navigator.language").split("-")[0]
intl = _ev(page,
"Intl.DateTimeFormat().resolvedOptions().locale").split("-")[0]
assert nav == intl, (
f"navigator.language base={nav!r} vs Intl={intl!r}"
)
@pytest.mark.e2e
def test_navigator_language_matches_Intl_NumberFormat(page):
nav = _ev(page, "navigator.language").split("-")[0]
num = _ev(page,
"Intl.NumberFormat().resolvedOptions().locale").split("-")[0]
assert nav == num
@pytest.mark.e2e
def test_navigator_language_matches_Intl_Collator(page):
nav = _ev(page, "navigator.language").split("-")[0]
col = _ev(page,
"(new Intl.Collator()).resolvedOptions().locale").split("-")[0]
assert nav == col
# ===========================================================================
# 8. Property descriptor shape lies
# Spoofers using Object.defineProperty(navigator, prop, {value: ...})
# leave a 'value' field on the descriptor — real native props use a getter.
# ===========================================================================
_DESCRIPTOR_NATIVE_PROPS = [
"userAgent", "platform", "hardwareConcurrency", "language", "languages",
"vendor", "appVersion", "appName", "appCodeName", "doNotTrack",
"cookieEnabled", "onLine", "product", "productSub", "buildID", "oscpu",
]
@pytest.mark.e2e
@pytest.mark.parametrize("prop", _DESCRIPTOR_NATIVE_PROPS)
def test_navigator_property_descriptor_is_getter_not_value(page, prop):
"""Each spoofable navigator.* property must be defined via a native
getter NOT Object.defineProperty(..., {value: x}). The value-field
descriptor is the lazy spoof leak CreepJS catches."""
has_lie = _ev(page, f"""() => {{
let proto = navigator;
let descriptor = null;
while (proto && !descriptor) {{
descriptor = Object.getOwnPropertyDescriptor(proto, {prop!r});
proto = Object.getPrototypeOf(proto);
}}
if (!descriptor) return null;
return 'value' in descriptor;
}}""")
if has_lie is None:
pytest.skip(f"navigator.{prop} not exposed")
assert has_lie is False, (
f"navigator.{prop} descriptor exposes 'value' field — lazy spoof"
)
# ===========================================================================
# 9. performance.timeOrigin + monotonic
# ===========================================================================
@pytest.mark.e2e
def test_performance_timeOrigin_stable(page):
assert _ev(page,
"performance.timeOrigin === performance.timeOrigin")
@pytest.mark.e2e
def test_performance_now_monotonic(page):
ok = _ev(page, """() => {
let prev = performance.now();
for (let i = 0; i < 100; i++) {
const cur = performance.now();
if (cur < prev) return false;
prev = cur;
}
return true;
}""")
assert ok
# ===========================================================================
# 10. Window dimension invariants
# ===========================================================================
@pytest.mark.e2e
def test_window_inner_not_larger_than_outer(page):
inner, outer = _ev(page, "[window.innerWidth, window.outerWidth]")
assert inner <= outer
@pytest.mark.e2e
def test_screen_avail_not_larger_than_screen(page):
aw, w = _ev(page, "[screen.availWidth, screen.width]")
ah, h = _ev(page, "[screen.availHeight, screen.height]")
assert aw <= w and ah <= h
# ===========================================================================
# 11. Firefox UA invariants
# ===========================================================================
@pytest.mark.e2e
def test_firefox_UA_implies_empty_vendor(page):
"""Firefox: navigator.vendor === ''"""
if "Firefox" not in _ev(page, "navigator.userAgent"):
pytest.skip("Firefox-only invariant")
if "Chrome" in _ev(page, "navigator.userAgent"):
pytest.skip("Chrome+Firefox UA — likely synthetic")
assert _ev(page, "navigator.vendor") == ""
@pytest.mark.e2e
def test_firefox_appVersion_short_form(page):
"""Real Firefox's appVersion is '5.0 (Windows)' form, not the full UA."""
if "Firefox" not in _ev(page, "navigator.userAgent"):
pytest.skip("Firefox-only invariant")
av = _ev(page, "navigator.appVersion")
ua = _ev(page, "navigator.userAgent")
assert av.startswith("5.0 (")
assert len(av) < len(ua)
@pytest.mark.e2e
def test_firefox_UA_implies_appName_Netscape(page):
"""navigator.appName === 'Netscape' (historical invariant)."""
if "Firefox" not in _ev(page, "navigator.userAgent"):
pytest.skip("Firefox-only invariant")
assert _ev(page, "navigator.appName") == "Netscape"

View file

@ -0,0 +1,430 @@
"""Fingerprint surface tests — replicate the checks performed by the canonical
anti-bot detection libraries against an OFFLINE browser session.
Each test asserts the SAME thing the upstream detector would flag. A pass
here means our patched build appears human to that detector; a fail
means a real stealth hole that anti-bot kits would exploit in production.
Detector libraries studied (all FOSS, MIT-licensed):
- github.com/fingerprintjs/BotD 19 detectors, the most
widely deployed client-side
bot detector
- github.com/abrahamjuliot/creepjs headless / stealth / lies
modules
- github.com/fingerprintjs/fingerprintjs canvas / audio / color /
touch consistency
- github.com/antoinevastel/fpscanner UA / platform / oscpu
cross-checks
- bot.sannysoft.com classic Puppeteer harness
Everything runs against `about:blank` with NO network and NO proxy. The
suite is intended to be part of the release-gate: pre-push hook runs
`pytest -m e2e` and these tests must be green on every release.
Run only this file:
pytest tests/test_fingerprint_surface.py -m e2e -v
"""
from __future__ import annotations
import re
import sys
import pytest
from invisible_playwright import InvisiblePlaywright
from invisible_playwright.constants import BINARY_ENTRY_REL
# ────────────────────────────────────────────────────────────────────
# Inline PIN — a coherent mid-range Windows desktop. Not user-config:
# these specific values are what the surface tests assert against.
# Keep PIN small (only fields that JS exposes) and stable across runs.
# ────────────────────────────────────────────────────────────────────
PIN = {
"screen.width": 1920,
"screen.height": 1080,
"screen.avail_width": 1920,
"screen.avail_height": 1040,
"screen.dpr": 1.0,
"hardware.concurrency": 8,
"audio.sample_rate": 48000,
"audio.max_channel_count": 2,
}
@pytest.fixture(scope="session")
def firefox_binary():
"""Locate the patched Firefox binary. Three lookup paths:
1. ``INVPW_BINARY_PATH`` env var (for dev iteration against a local build)
2. Cached binary under ``cache_dir_for_version()`` (post-fetch)
3. Skip cleanly (no implicit network download)."""
import os
env_path = os.environ.get("INVPW_BINARY_PATH")
if env_path:
from pathlib import Path
if Path(env_path).exists():
return env_path
pytest.skip(f"INVPW_BINARY_PATH={env_path!r} does not exist")
if sys.platform not in BINARY_ENTRY_REL:
pytest.skip(f"unsupported platform: {sys.platform}")
from invisible_playwright.download import cache_dir_for_version
entry = cache_dir_for_version() / BINARY_ENTRY_REL[sys.platform]
if not entry.exists():
pytest.skip(
"patched Firefox not cached; run "
"`python -m invisible_playwright fetch` first, or set "
"INVPW_BINARY_PATH to a local build"
)
return str(entry)
@pytest.fixture(scope="module")
def page(firefox_binary):
"""One headless browser shared across the whole module.
~20s startup paid once, then every test runs in ~50ms."""
with InvisiblePlaywright(
seed=42,
pin=PIN,
binary_path=firefox_binary,
headless=True,
) as browser:
ctx = browser.new_context()
p = ctx.new_page()
p.goto("about:blank", timeout=30_000)
yield p
def _ev(page, expr):
return page.evaluate(expr)
# ===========================================================================
# BotD detectors (github.com/fingerprintjs/BotD/tree/main/src/detectors)
# Each detector becomes one pytest. The failure name maps to the BotKind
# constant BotD would emit on the wire.
# ===========================================================================
@pytest.mark.e2e
def test_botd_webdriver_property_is_falsey(page):
"""BotD: navigator.webdriver === true → HeadlessChrome verdict."""
assert not _ev(page, "navigator.webdriver"), (
"navigator.webdriver is truthy — instant HeadlessChrome verdict"
)
@pytest.mark.e2e
def test_botd_app_version_no_headless_token(page):
"""BotD detectAppVersion: /headless|electron|slimerjs/i in appVersion."""
av = _ev(page, "navigator.appVersion")
for token in ("headless", "electron", "slimerjs"):
assert not re.search(token, av, re.I), (
f"navigator.appVersion contains {token!r}: {av!r}"
)
@pytest.mark.e2e
def test_botd_user_agent_no_headless_or_selenium_token(page):
"""BotD: /headless|selenium|phantom/i in UA."""
ua = _ev(page, "navigator.userAgent")
for token in ("headless", "selenium", "phantom"):
assert not re.search(token, ua, re.I), (
f"navigator.userAgent contains {token!r}: {ua!r}"
)
@pytest.mark.e2e
def test_botd_function_bind_is_function(page):
"""BotD detectFunctionBind: missing Function.prototype.bind = PhantomJS."""
assert _ev(page, "typeof Function.prototype.bind === 'function'")
@pytest.mark.e2e
def test_botd_product_sub_is_gecko_value(page):
"""BotD detectProductSub: Firefox must return '20100101'; '20030107'
on a Firefox UA = Chrome-stub leaked under spoof."""
assert _ev(page, "navigator.productSub") == "20100101", (
"navigator.productSub must be '20100101' on Firefox"
)
@pytest.mark.e2e
def test_botd_no_process_global(page):
"""BotD detectProcess: window.process indicates Electron."""
assert not _ev(page,
"typeof window.process !== 'undefined' && "
"window.process.type === 'renderer'"
)
assert not _ev(page,
"typeof window.process !== 'undefined' && "
"window.process.versions != null && "
"typeof window.process.versions.electron !== 'undefined'"
)
@pytest.mark.e2e
def test_botd_eval_length_matches_engine(page):
"""BotD detectEvalLengthInconsistency: `eval.toString().length` must be
37 on Gecko (33 on Chromium, 39 on IE). Mismatch = engine spoof."""
assert _ev(page, "eval.toString().length") == 37
@pytest.mark.e2e
def test_botd_languages_array_non_empty(page):
"""BotD detectLanguagesLengthInconsistency: empty navigator.languages
is the classic HeadlessChrome tell."""
assert _ev(page, "navigator.languages.length") > 0
@pytest.mark.e2e
def test_botd_plugins_instance_of_PluginArray(page):
"""BotD detectPluginsArray: navigator.plugins must be a real PluginArray."""
assert _ev(page, "navigator.plugins instanceof PluginArray")
@pytest.mark.e2e
def test_botd_mime_types_consistent_prototype_chain(page):
"""BotD areMimeTypesConsistent: navigator.mimeTypes and each entry
must have proper prototype chain. Spoofers using plain arrays fail."""
consistent = _ev(page, """() => {
if (typeof navigator.mimeTypes === 'undefined' ||
typeof MimeTypeArray === 'undefined') return false;
let ok = Object.getPrototypeOf(navigator.mimeTypes) === MimeTypeArray.prototype;
for (let i = 0; i < navigator.mimeTypes.length; i++) {
ok = ok && Object.getPrototypeOf(navigator.mimeTypes[i]) === MimeType.prototype;
}
return ok;
}""")
assert consistent, "navigator.mimeTypes prototype chain inconsistent"
@pytest.mark.e2e
def test_botd_no_distinctive_window_props(page):
"""BotD checkDistinctiveProperties: scan window for automation globals."""
DISTINCTIVE = [
"awesomium", "RunPerfTest", "CefSharp", "fmget_targets", "geb",
"__nightmare", "nightmare", "__phantomas", "callPhantom", "_phantom",
"wdioElectron", "__webdriverFunc", "_WEBDRIVER_ELEM_CACHE",
"ChromeDriverw", "domAutomation", "domAutomationController",
]
leaks = [n for n in DISTINCTIVE
if _ev(page, f"typeof window[{n!r}] !== 'undefined'")]
assert not leaks, f"Distinctive bot globals leaked: {leaks}"
@pytest.mark.e2e
def test_botd_no_distinctive_document_props(page):
"""BotD: document-side automation globals (webdriver/selenium/cdc)."""
DOC_LEAKS = [
"__webdriver_evaluate", "__selenium_evaluate",
"__webdriver_script_function", "__webdriver_script_func",
"__webdriver_script_fn", "__fxdriver_evaluate",
"__driver_unwrapped", "__webdriver_unwrapped",
"__driver_evaluate", "__selenium_unwrapped",
"__fxdriver_unwrapped",
"$cdc_asdjflasutopfhvcZLmcf", "$cdc_asdjflasutopfhvcZLmcfl_",
"$chrome_asyncScriptInfo", "__$webdriverAsyncExecutor",
]
leaks = [n for n in DOC_LEAKS
if _ev(page, f"typeof document[{n!r}] !== 'undefined'")]
assert not leaks, f"document carries automation property names: {leaks}"
@pytest.mark.e2e
def test_botd_document_html_attributes_clean(page):
"""BotD detectDocumentAttributes: html element attrs contain 'selenium'
/ 'webdriver' / 'driver' Selenium verdict."""
attrs = _ev(page,
"Array.from(document.documentElement.attributes).map(a => a.name + '=' + a.value)")
bad = [a for a in attrs if any(t in a.lower()
for t in ("selenium", "webdriver", "driver"))]
assert not bad, f"HTML attributes contain bot tokens: {bad}"
@pytest.mark.e2e
def test_botd_window_size_nonzero(page):
"""BotD detectWindowSize: headless without window manager → 0x0."""
ow = _ev(page, "window.outerWidth")
oh = _ev(page, "window.outerHeight")
assert ow > 0 and oh > 0, (
f"outerWidth/Height = {ow}/{oh} — headless without window manager"
)
@pytest.mark.e2e
def test_botd_webgl_debug_renderer_info_available(page):
"""BotD detectWebGL: WEBGL_debug_renderer_info extension must exist."""
has_ext = _ev(page, """() => {
const c = document.createElement('canvas');
const gl = c.getContext('webgl') || c.getContext('experimental-webgl');
return !!gl && !!gl.getExtension('WEBGL_debug_renderer_info');
}""")
assert has_ext
# ===========================================================================
# sannysoft.com — classic Puppeteer detection harness
# ===========================================================================
@pytest.mark.e2e
def test_sannysoft_chrome_object_consistency(page):
"""Firefox UA + window.chrome present = bot-framework leak."""
if "Firefox" in _ev(page, "navigator.userAgent"):
assert not _ev(page, "typeof window.chrome !== 'undefined'")
@pytest.mark.e2e
def test_sannysoft_permissions_query_works(page):
"""navigator.permissions.query() must return a proper PermissionStatus."""
ok = _ev(page, """async () => {
if (!navigator.permissions || !navigator.permissions.query) return false;
try {
const r = await navigator.permissions.query({name: 'notifications'});
return r && typeof r.state === 'string';
} catch (e) { return false; }
}""")
assert ok
@pytest.mark.e2e
def test_sannysoft_iframe_chrome_not_leaked(page):
"""iframe.contentWindow.chrome must not leak on Firefox UA."""
if "Firefox" not in _ev(page, "navigator.userAgent"):
pytest.skip("Firefox-only invariant")
leaks = _ev(page, """() => {
const iframe = document.createElement('iframe');
iframe.style.display = 'none';
document.body.appendChild(iframe);
const is = typeof iframe.contentWindow.chrome !== 'undefined';
document.body.removeChild(iframe);
return is;
}""")
assert not leaks
@pytest.mark.e2e
def test_sannysoft_iframe_languages_not_empty(page):
"""Iframe-scope navigator.languages must have ≥1 entry."""
n = _ev(page, """() => {
const f = document.createElement('iframe');
f.style.display = 'none';
document.body.appendChild(f);
const len = f.contentWindow.navigator.languages.length;
document.body.removeChild(f);
return len;
}""")
assert n > 0
# ===========================================================================
# FingerprintJS — fingerprint surface coherence
# ===========================================================================
@pytest.mark.e2e
def test_fpjs_canvas_2d_context_returns_valid(page):
ok = _ev(page, """() => {
const c = document.createElement('canvas');
c.width = 100; c.height = 100;
const ctx = c.getContext('2d');
if (!ctx) return false;
ctx.fillText('test', 10, 10);
const data = c.toDataURL();
return data.length > 100 && data.startsWith('data:image/png;base64');
}""")
assert ok
@pytest.mark.e2e
def test_fpjs_audio_context_works(page):
ok = _ev(page, """async () => {
try {
const ctx = new (window.OfflineAudioContext ||
window.webkitOfflineAudioContext)(1, 5000, 44100);
const osc = ctx.createOscillator();
osc.connect(ctx.destination);
osc.start(0);
const buf = await ctx.startRendering();
return buf && buf.length > 0;
} catch (e) { return false; }
}""")
assert ok
@pytest.mark.e2e
def test_fpjs_color_gamut_query_works(page):
"""matchMedia('(color-gamut: ...)') must match at least srgb."""
ok = _ev(page, """matchMedia('(color-gamut: srgb)').matches ||
matchMedia('(color-gamut: p3)').matches ||
matchMedia('(color-gamut: rec2020)').matches""")
assert ok
@pytest.mark.e2e
def test_fpjs_screen_color_depth_realistic(page):
"""Atypical color depths are headless-distinctive."""
cd = _ev(page, "screen.colorDepth")
assert cd in (24, 30, 32)
# ===========================================================================
# PIN-locked surfaces (the values declared in PIN above)
# ===========================================================================
@pytest.mark.e2e
def test_pin_screen_width_lands_in_screen_object(page):
assert _ev(page, "screen.width") == PIN["screen.width"]
@pytest.mark.e2e
def test_pin_screen_height_lands_in_screen_object(page):
assert _ev(page, "screen.height") == PIN["screen.height"]
@pytest.mark.e2e
def test_pin_hardware_concurrency_lands_in_navigator(page):
assert (_ev(page, "navigator.hardwareConcurrency")
== PIN["hardware.concurrency"])
@pytest.mark.e2e
def test_pin_audio_sample_rate_lands_in_AudioContext(page):
assert _ev(page,
"(new (window.AudioContext||window.webkitAudioContext)()).sampleRate"
) == PIN["audio.sample_rate"]
@pytest.mark.e2e
def test_pin_audio_max_channels_lands_in_destination(page):
assert _ev(page,
"(new (window.AudioContext||window.webkitAudioContext)())"
".destination.maxChannelCount"
) == PIN["audio.max_channel_count"]
# ===========================================================================
# fpscanner-style cross-checks
# ===========================================================================
@pytest.mark.e2e
def test_fpscanner_ua_vs_platform_consistent(page):
"""UA OS substring must agree with navigator.platform OS substring."""
ua = _ev(page, "navigator.userAgent")
platform = _ev(page, "navigator.platform")
if "Windows" in ua:
assert "Win" in platform, f"UA Win but platform={platform!r}"
elif "Mac" in ua:
assert "Mac" in platform
elif "Linux" in ua:
assert "Linux" in platform or "X11" in platform
@pytest.mark.e2e
def test_fpscanner_no_userAgentData_on_firefox(page):
"""navigator.userAgentData is Chromium-only. Presence on Firefox UA = bot."""
if "Firefox" in _ev(page, "navigator.userAgent"):
assert not _ev(page, "'userAgentData' in navigator")