From 8bf72da40c67c8b6644a89c2aab9f2d61b79df3f Mon Sep 17 00:00:00 2001 From: feder-cr <85809106+feder-cr@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:35:52 +0200 Subject: [PATCH] test(webrtc): realness sentinels + e2e behind a fake TCP-only SOCKS proxy Unit sentinels (run in CI) assert the rules a real WebRTC profile must meet: host candidate is mDNS .local (never a raw LAN IP), the synthetic srflx carries the egress IP with a genuine nICEr priority (rejecting the old local_pref 0xFFFF) and a foundation distinct from the host one, and CreepJS's resolver returns the egress (and reads a host-only SDP as blocked). e2e tests launch the binary and check the live gather. "Behind a proxy" is reproduced without any external proxy: an in-process SOCKS5 server relays TCP CONNECT but refuses UDP ASSOCIATE (a TCP-only residential proxy), and the egress IP is injected as an RFC 5737 TEST-NET address. test_not_blocked_behind_tcp_only_socks guards the gather-fails-behind-proxy bug. webrtc-e2e.yml runs the e2e on demand (needs a binary that carries the fixes). --- .github/workflows/webrtc-e2e.yml | 47 ++++ tests/test_webrtc_realness.py | 442 +++++++++++++++++++++++++++++++ 2 files changed, 489 insertions(+) create mode 100644 .github/workflows/webrtc-e2e.yml create mode 100644 tests/test_webrtc_realness.py diff --git a/.github/workflows/webrtc-e2e.yml b/.github/workflows/webrtc-e2e.yml new file mode 100644 index 0000000..d14b8ce --- /dev/null +++ b/.github/workflows/webrtc-e2e.yml @@ -0,0 +1,47 @@ +name: webrtc-e2e + +# Live WebRTC realness check against the shipped patched binary. +# +# Manual (workflow_dispatch) on purpose: it needs a firefox-N binary that +# carries the WebRTC fixes (synthetic srflx in genuine nICEr form + the +# default-route fallback behind a proxy). Run it after publishing such a +# binary — it is the release gate for "WebRTC looks real behind a proxy". +# Until that binary ships, test_not_blocked_behind_tcp_only_socks is EXPECTED +# to fail (the old binary is fully blocked behind a SOCKS proxy), which is the +# whole point of the gate. +# +# No smartproxy / credentials: the "behind a proxy" condition is faked by an +# in-process TCP-only SOCKS5 server (refuses UDP ASSOCIATE) and the egress IP +# is injected as an RFC 5737 TEST-NET address. Fully self-contained. + +on: + workflow_dispatch: + +jobs: + webrtc-e2e: + name: webrtc realness (ubuntu, py3.12) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install package + dev extras + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Fetch the patched Firefox binary + run: python -m invisible_playwright fetch + + - name: Resolve binary path + run: echo "STEALTHFOX_E2E_BINARY=$(python -m invisible_playwright path)" >> "$GITHUB_ENV" + + - name: Run WebRTC realness e2e (xvfb for the headless Firefox) + run: | + sudo apt-get update && sudo apt-get install -y xvfb + xvfb-run -a pytest tests/test_webrtc_realness.py -m e2e -o addopts="" -v -rs diff --git a/tests/test_webrtc_realness.py b/tests/test_webrtc_realness.py new file mode 100644 index 0000000..fec01c0 --- /dev/null +++ b/tests/test_webrtc_realness.py @@ -0,0 +1,442 @@ +"""WebRTC realness regression tests. + +Two layers, both runnable on GitHub CI: + +* **unit** (`@pytest.mark.unit`) — pure SDP/candidate assertions against golden + samples. No browser, no proxy, no network. These lock in every rule we found + on 2026-06-06: host must be mDNS ``.local``; the synthetic srflx must carry the + egress IP with a GENUINE nICEr priority (never ``local_pref == 0xFFFF``) and a + stable, distinct foundation; CreepJS's resolver must return the egress, and a + host-only SDP must read as "blocked". They run in the standard ``tests.yml``. + +* **e2e** (`@pytest.mark.e2e`) — launch the patched binary and verify the live + ICE gather. "Being behind a proxy" is faked WITHOUT smartproxy: + - the egress IP is injected via ``STEALTHFOX_WEBRTC_PUBLIC_IP`` (RFC 5737 + TEST-NET, so it never collides with a real IP); + - the "behind a TCP-only SOCKS proxy" condition is reproduced by a tiny + in-process SOCKS5 server that relays TCP CONNECT but refuses UDP ASSOCIATE + (exactly a residential TCP-only proxy → WebRTC's default-route UDP probe + fails → exercises the Fix C fallback). No credentials, no external proxy. + Excluded from the default run; a binary is located via ``STEALTHFOX_E2E_BINARY`` + (or the locally-built tree), else the test skips. +""" +from __future__ import annotations + +import os +import re +import select +import socket +import struct +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest + +# ────────────────────────────────────────────────────────────────────────── +# Pure SDP / ICE-candidate helpers (no I/O) — the heart of the sentinels. +# ────────────────────────────────────────────────────────────────────────── +_CAND = re.compile( + r"candidate:(?P\S+)\s+(?P\d+)\s+(?PUDP|TCP|udp|tcp)\s+" + r"(?P\d+)\s+(?P
\S+)\s+(?P\d+)\s+typ\s+(?P\w+)" + r"(?:.*?raddr\s+(?P\S+)\s+rport\s+(?P\d+))?" +) + + +def parse_candidate(line): + """Parse one ``a=candidate:`` / ``candidate:`` line into a dict (or None).""" + m = _CAND.search(line) + if not m: + return None + d = m.groupdict() + d["component"] = int(d["component"]) + d["priority"] = int(d["priority"]) + d["port"] = int(d["port"]) + d["proto"] = d["proto"].upper() + if d["rport"] is not None: + d["rport"] = int(d["rport"]) + return d + + +def decode_priority(prio): + """Split a candidate priority into nICEr's fields (RFC 5245 layout that + nICEr emits: type<<24 | iface<<16 | dir<<13 | stun<<8 | (256-component)).""" + return { + "type_pref": (prio >> 24) & 0xFF, + "iface_pref": (prio >> 16) & 0xFF, + "local_pref": (prio >> 8) & 0xFFFF, + "direction": (prio >> 13) & 0x7, + "stun_priority": (prio >> 8) & 0x1F, + "component": 256 - (prio & 0xFF), + } + + +def is_mdns(addr): + return bool(addr) and str(addr).endswith(".local") + + +def candidates(sdp_or_lines): + if isinstance(sdp_or_lines, str): + lines = re.findall(r"(?:a=)?candidate:[^\r\n]*", sdp_or_lines) + else: + lines = list(sdp_or_lines) + return [c for c in (parse_candidate(l) for l in lines) if c] + + +def host_candidates(cands): + return [c for c in cands if c["typ"] == "host"] + + +def srflx_candidates(cands): + return [c for c in cands if c["typ"] == "srflx"] + + +def host_is_mdns(cands): + """Every host candidate must be a ``.local`` mDNS name, never a raw + LAN IP (the §9.4 leak form that fails BrowserLeaks).""" + hosts = host_candidates(cands) + return bool(hosts) and all(is_mdns(c["address"]) for c in hosts) + + +def srflx_realness(cand, expected_ip=None): + """Return (ok, reasons) for whether ``cand`` looks like a GENUINE nICEr UDP + server-reflexive candidate. Encodes the 2026-06-06 findings.""" + reasons = [] + if cand["typ"] != "srflx": + reasons.append("not a srflx candidate") + return False, reasons + if expected_ip is not None and cand["address"] != expected_ip: + reasons.append(f"address {cand['address']} != expected {expected_ip}") + p = decode_priority(cand["priority"]) + if p["type_pref"] != 100: + reasons.append(f"type_pref {p['type_pref']} != 100 (SRV_RFLX)") + if p["local_pref"] == 0xFFFF: + reasons.append("local_pref == 0xFFFF — impossible nICEr value (the old hardcoded tell)") + elif not (0x7000 <= p["local_pref"] < 0x8000): + reasons.append(f"local_pref {p['local_pref']} outside the genuine ~0x7E00-0x7FFF band") + if not (16 <= p["stun_priority"] <= 31): + reasons.append(f"stun_priority {p['stun_priority']} implausible (expect 31-server_id)") + if cand.get("raddr") not in (None, "0.0.0.0"): + reasons.append(f"raddr {cand['raddr']} not redacted to 0.0.0.0") + return (not reasons), reasons + + +def creep_get_ipaddress(sdp): + """Faithful port of CreepJS's getIPAddress(sdp): connection line first, then + the first candidate IP; '0.0.0.0' counts as blocked. Returns None if blocked + — i.e. exactly what makes CreepJS render 'stun connection: blocked'.""" + blocked = "0.0.0.0" + conn = (re.findall(r"c=IN\s.+\s", sdp) or [""])[0].strip().split(" ") + conn_ip = conn[2] if len(conn) > 2 else "" + if conn_ip and conn_ip != blocked: + return conn_ip + m = re.search(r"(udp|tcp)\s(?:\d|\w)+\s((?:\d|\w|\.|:)+)(?=\s)", sdp, re.I) + ip = m.group(2) if m else None + return ip if (ip and ip != blocked) else None + + +# ────────────────────────────────────────────────────────────────────────── +# Golden samples — real priority/foundation values, TEST-NET IPs (RFC 5737) +# so no real address is ever committed (feedback_pre_push_privacy_check). +# ────────────────────────────────────────────────────────────────────────── +HOST_MDNS = "candidate:0 1 UDP 2122252543 1460e928-16b3-4c66-80ad-04abcdef0000.local 54551 typ host" +HOST_RAW_IP = "candidate:0 1 UDP 2122252543 192.168.1.20 54551 typ host" # §9.4 leak form +VANILLA_SRFLX = "candidate:1 1 UDP 1685987327 203.0.113.50 3755 typ srflx raddr 0.0.0.0 rport 0" +OURS_SRFLX = "candidate:1 1 UDP 1686052863 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0" +# Pre-fix injection: local_pref hardcoded to 0xFFFF (priority 1694498815). The tell. +OLD_BAD_SRFLX = "candidate:2 1 UDP 1694498815 203.0.113.7 58555 typ srflx raddr 0.0.0.0 rport 0" + +SDP_GOOD = ( + "v=0\r\nc=IN IP4 0.0.0.0\r\n" + f"a={HOST_MDNS}\r\na={OURS_SRFLX}\r\n" +) +SDP_BLOCKED = "v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n" # host-only, no srflx + + +# ────────────────────────────────────────────────────────────────────────── +# UNIT sentinels (run on GitHub CI) +# ────────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_parse_and_decode_basics(): + c = parse_candidate(OURS_SRFLX) + assert c["typ"] == "srflx" and c["proto"] == "UDP" + assert c["address"] == "203.0.113.7" and c["raddr"] == "0.0.0.0" and c["rport"] == 0 + p = decode_priority(c["priority"]) + assert p["type_pref"] == 100 and p["stun_priority"] == 31 and p["component"] == 1 + + +@pytest.mark.unit +def test_genuine_srflx_passes(): + for line in (VANILLA_SRFLX, OURS_SRFLX): + ok, reasons = srflx_realness(parse_candidate(line), expected_ip=parse_candidate(line)["address"]) + assert ok, reasons + + +@pytest.mark.unit +def test_old_0xffff_srflx_is_rejected(): + """Fix A sentinel: local_pref == 0xFFFF must be flagged as fake.""" + ok, reasons = srflx_realness(parse_candidate(OLD_BAD_SRFLX)) + assert not ok + assert any("0xFFFF" in r for r in reasons), reasons + + +@pytest.mark.unit +def test_host_must_be_mdns_not_raw_ip(): + """§9.4 sentinel: raw-IP host candidate is a leak; .local is required.""" + assert host_is_mdns(candidates([HOST_MDNS])) is True + assert host_is_mdns(candidates([HOST_RAW_IP])) is False + + +@pytest.mark.unit +def test_srflx_foundation_distinct_from_host(): + """Fix B sentinel: srflx foundation must differ from the host foundations.""" + cands = candidates([HOST_MDNS, OURS_SRFLX]) + host_fnds = {c["foundation"] for c in host_candidates(cands)} + srflx_fnds = {c["foundation"] for c in srflx_candidates(cands)} + assert srflx_fnds and srflx_fnds.isdisjoint(host_fnds) + + +@pytest.mark.unit +def test_creep_resolver_returns_egress_when_srflx_present(): + assert creep_get_ipaddress(SDP_GOOD) == "203.0.113.7" + + +@pytest.mark.unit +def test_creep_resolver_reports_blocked_for_host_only(): + """The exact false-green we shipped: host-only (.local) SDP → no public IP + → CreepJS shows 'blocked'. The resolver must return None here.""" + assert creep_get_ipaddress(SDP_BLOCKED) is None + + +@pytest.mark.unit +def test_mdns_host_is_invisible_to_creep_resolver(): + """A .local host must NOT be mis-read as an IP (the hyphen in the UUID is + what makes CreepJS skip it and fall through to the srflx).""" + assert creep_get_ipaddress("v=0\r\nc=IN IP4 0.0.0.0\r\n" f"a={HOST_MDNS}\r\n") is None + + +# ────────────────────────────────────────────────────────────────────────── +# Fake-proxy infrastructure for e2e: a tiny TCP-only SOCKS5 server. +# ────────────────────────────────────────────────────────────────────────── +class _Socks5TcpOnly: + """Minimal SOCKS5: no-auth, CONNECT (TCP) relayed, UDP ASSOCIATE refused. + + Reproduces a residential TCP-only proxy: pages load over TCP, but WebRTC's + UDP path is dead — which (for a no-camera page in default_address_only mode) + is exactly what made the default-route probe fail and ICE return zero + candidates before Fix C. + """ + + def __init__(self): + self._srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self._srv.bind(("127.0.0.1", 0)) + self._srv.listen(16) + self.port = self._srv.getsockname()[1] + self.udp_associate_attempts = 0 + self._stop = False + self._t = threading.Thread(target=self._serve, daemon=True) + self._t.start() + + def _serve(self): + while not self._stop: + try: + conn, _ = self._srv.accept() + except OSError: + break + threading.Thread(target=self._handle, args=(conn,), daemon=True).start() + + def _recv_exact(self, sock, n): + buf = b"" + while len(buf) < n: + chunk = sock.recv(n - len(buf)) + if not chunk: + return None + buf += chunk + return buf + + def _handle(self, conn): + try: + head = self._recv_exact(conn, 2) + if not head or head[0] != 0x05: + conn.close() + return + nmethods = head[1] + self._recv_exact(conn, nmethods) + conn.sendall(b"\x05\x00") # no-auth + req = self._recv_exact(conn, 4) + if not req: + conn.close() + return + ver, cmd, _, atyp = req + if atyp == 0x01: + addr = socket.inet_ntoa(self._recv_exact(conn, 4)) + elif atyp == 0x03: + ln = self._recv_exact(conn, 1)[0] + addr = self._recv_exact(conn, ln).decode("ascii", "ignore") + elif atyp == 0x04: + addr = socket.inet_ntop(socket.AF_INET6, self._recv_exact(conn, 16)) + else: + conn.close() + return + port = struct.unpack("!H", self._recv_exact(conn, 2))[0] + if cmd != 0x01: # not CONNECT (e.g. UDP ASSOCIATE) → refuse + self.udp_associate_attempts += 1 + conn.sendall(b"\x05\x07\x00\x01\x00\x00\x00\x00\x00\x00") # cmd not supported + conn.close() + return + try: + upstream = socket.create_connection((addr, port), timeout=15) + except OSError: + conn.sendall(b"\x05\x04\x00\x01\x00\x00\x00\x00\x00\x00") # host unreachable + conn.close() + return + conn.sendall(b"\x05\x00\x00\x01\x00\x00\x00\x00\x00\x00") # success + self._relay(conn, upstream) + except Exception: + try: + conn.close() + except Exception: + pass + + def _relay(self, a, b): + try: + while True: + r, _, _ = select.select([a, b], [], [], 30) + if not r: + break + for s in r: + data = s.recv(65536) + if not data: + return + (b if s is a else a).sendall(data) + finally: + for s in (a, b): + try: + s.close() + except Exception: + pass + + def close(self): + self._stop = True + try: + self._srv.close() + except Exception: + pass + + +# Same per-event probe CreepJS runs (kept tiny; raw string = one escape level). +_PROBE_JS = r"""async () => { + const pc = new RTCPeerConnection({iceCandidatePoolSize:1, iceServers:[{urls:[ + 'stun:stun4.l.google.com:19302','stun:stun3.l.google.com:19302']}]}); + pc.createDataChannel(''); + const cands = []; + pc.addEventListener('icecandidate', e => { if (e.candidate && e.candidate.candidate) cands.push(e.candidate.candidate); }); + await pc.setLocalDescription(await pc.createOffer({offerToReceiveAudio:1, offerToReceiveVideo:1})); + await new Promise(r => setTimeout(r, 3500)); + const sdp = (pc.localDescription && pc.localDescription.sdp) || ''; + try { pc.close(); } catch(e) {} + return { candidates: cands, sdp }; +}""" + +_FAKE_EGRESS = "203.0.113.7" # RFC 5737 TEST-NET-3 + + +def _e2e_binary(): + cand = os.environ.get("STEALTHFOX_E2E_BINARY") + if cand and os.path.exists(cand): + return cand + built = r"C:\ff\source\obj-x86_64-pc-windows-msvc\dist\bin\firefox.exe" + if os.path.exists(built): + return built + return None + + +@pytest.fixture +def socks5_tcp_only(): + srv = _Socks5TcpOnly() + yield srv + srv.close() + + +@pytest.fixture +def local_https_page(): + """A trivial localhost page (used by the no-proxy srflx test).""" + class H(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header("Content-Type", "text/html") + self.end_headers() + self.wfile.write(b"wrtc") + + def log_message(self, *a): + pass + + httpd = HTTPServer(("127.0.0.1", 0), H) + threading.Thread(target=httpd.serve_forever, daemon=True).start() + yield f"http://127.0.0.1:{httpd.server_address[1]}/" + httpd.shutdown() + + +def _launch(**extra): + from invisible_playwright import InvisiblePlaywright + + kw = {"headless": True, + # Fixed zone so the wrapper does NOT run timezone="auto" egress + # discovery through the (fake) proxy — irrelevant here, we inject the + # egress IP directly and want the launch deterministic/offline. + "timezone": "America/New_York", + "extra_prefs": {"media.peerconnection.ice.obfuscate_host_addresses": True}} + kw.update(extra) + return InvisiblePlaywright(**kw) + + +@pytest.mark.e2e +def test_srflx_is_real_and_resolvable(local_https_page): + """No proxy needed: the egress is faked via the env. Asserts the live srflx + is genuine (Fix A/B) and that CreepJS's resolver returns it (not blocked).""" + binary = _e2e_binary() + if not binary: + pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)") + os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS + os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1" + with _launch(binary_path=binary) as browser: + page = browser.new_context().new_page() + page.goto(local_https_page, wait_until="domcontentloaded", timeout=60000) + res = page.evaluate(_PROBE_JS) + cands = candidates(res["candidates"]) + assert cands, "ICE produced ZERO candidates (blocked)" + assert host_is_mdns(cands), [c["address"] for c in host_candidates(cands)] + srflx = [c for c in srflx_candidates(cands) if c["address"] == _FAKE_EGRESS] + assert srflx, f"no synthetic srflx with {_FAKE_EGRESS}: {res['candidates']}" + ok, reasons = srflx_realness(srflx[0], expected_ip=_FAKE_EGRESS) + assert ok, reasons + # Two srflx for the same base must share ONE stable foundation (Fix B). + assert len({c["foundation"] for c in srflx}) == 1 + assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS + + +@pytest.mark.e2e +def test_not_blocked_behind_tcp_only_socks(socks5_tcp_only): + """Fix C sentinel: behind a TCP-only SOCKS proxy on a remote origin, ICE + must still complete (host .local + synthetic srflx), not return zero + candidates. Without Fix C this page is fully 'blocked'.""" + binary = _e2e_binary() + if not binary: + pytest.skip("no patched binary (set STEALTHFOX_E2E_BINARY)") + os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _FAKE_EGRESS + os.environ["STEALTHFOX_WEBRTC_DISABLE_IPV6"] = "1" + proxy = {"server": f"socks5://127.0.0.1:{socks5_tcp_only.port}"} + try: + with _launch(binary_path=binary, proxy=proxy) as browser: + page = browser.new_context().new_page() + # remote origin loaded THROUGH the local SOCKS proxy (not localhost, + # so no proxy-bypass) → WebRTC proxy config active → Fix C path. + page.goto("https://example.com/", wait_until="domcontentloaded", timeout=70000) + res = page.evaluate(_PROBE_JS) + except Exception as exc: # network/proxy unavailable in this environment + pytest.skip(f"proxy/network path unavailable: {exc!r}") + cands = candidates(res["candidates"]) + assert cands, "behind SOCKS the gather returned ZERO candidates — Fix C regressed (blocked)" + assert host_is_mdns(cands) + assert any(c["address"] == _FAKE_EGRESS for c in srflx_candidates(cands)), res["candidates"] + assert creep_get_ipaddress(res["sdp"]) == _FAKE_EGRESS