invisible_playwright/tests/test_geo.py
feder-cr db1d3ec359 feat(webrtc): auto-derive the WebRTC public IP + suppress IPv6 from the proxy egress
Reuse the single timezone="auto" egress lookup to also make WebRTC coherent:
- prepare_session_geo() returns (timezone, webrtc_ip) from one discovery;
  resolve_session_timezone delegates to it (timezone-only).
- launcher/async _build_env set STEALTHFOX_WEBRTC_PUBLIC_IP to the proxy egress
  (IPv4, when the caller has not pinned it) so the srflx candidate matches the
  proxy, and STEALTHFOX_WEBRTC_DISABLE_IPV6=1 to drop the leaking IPv6 host.
- prefs: baseline sets zoom.stealth.webrtc.disable_ipv6=true; the dead
  media.peerconnection.ice.disableIPv6 pref is removed.
- export prepare_session_geo / SessionGeo.

Needs a binary with the nICEr IPv6 patch to take full effect (env/pref are
no-ops on firefox-7); the public-IP srflx already works on firefox-7.

tests: tests/test_geo.py prepare_session_geo combos; full unit suite 436 green
plus live ICE verification against the patched build.
2026-06-06 06:28:30 +02:00

355 lines
14 KiB
Python

"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution).
Covers: the precedence policy (resolve_session_timezone), proxy→requests
translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked
mmdb). No real network or mmdb is touched.
"""
import sys
import types
import pytest
from invisible_playwright import _geo
from invisible_playwright._geo import (
GeoTimezoneError,
_proxies_for_requests,
_proxy_is_set,
discover_egress_ip,
ip_to_timezone,
prepare_session_geo,
resolve_session_timezone,
)
SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"}
HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"}
# ──────────────────────────────────────────────────────────────────────
# _proxy_is_set
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
@pytest.mark.parametrize(
"proxy,expected",
[
(None, False),
({}, False),
({"server": ""}, False),
({"server": " "}, False),
({"server": "direct://"}, False),
({"server": "DIRECT://"}, False),
({"server": "socks5://h:1"}, True),
({"server": "http://h:8080"}, True),
],
)
def test_proxy_is_set(proxy, expected):
assert _proxy_is_set(proxy) is expected
# ──────────────────────────────────────────────────────────────────────
# _proxies_for_requests — scheme + credential translation
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_proxies_socks5_uses_socks5h_remote_dns():
out = _proxies_for_requests(SOCKS)
assert out["http"] == "socks5h://u:p@gw.example:1080"
assert out["https"] == out["http"]
@pytest.mark.unit
def test_proxies_socks4_scheme():
out = _proxies_for_requests({"server": "socks4://gw:1080"})
assert out["http"] == "socks4://gw:1080"
@pytest.mark.unit
def test_proxies_http_and_https_schemes():
assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080"
out = _proxies_for_requests({"server": "https://gw:8443"})
assert out["https"] == "https://gw:8443"
@pytest.mark.unit
def test_proxies_no_scheme_defaults_to_http():
out = _proxies_for_requests({"server": "gw.example:3128"})
assert out["http"] == "http://gw.example:3128"
@pytest.mark.unit
def test_proxies_credentials_are_url_encoded():
out = _proxies_for_requests(
{"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"}
)
# '@', ':' and '/' in creds must be percent-encoded so they don't break
# the proxy URL parsing.
assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"]
@pytest.mark.unit
def test_proxies_no_credentials_has_no_auth_prefix():
out = _proxies_for_requests({"server": "socks5://gw:1080"})
assert out["http"] == "socks5h://gw:1080"
# ──────────────────────────────────────────────────────────────────────
# discover_egress_ip — mocked requests
# ──────────────────────────────────────────────────────────────────────
class _FakeResp:
def __init__(self, text, status=200):
self.text = text
self._status = status
def raise_for_status(self):
if self._status >= 400:
raise RuntimeError(f"HTTP {self._status}")
@pytest.mark.unit
def test_discover_egress_ip_first_endpoint_wins(monkeypatch):
calls = []
def fake_get(url, **kw):
calls.append(url)
return _FakeResp("203.0.113.7\n")
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(SOCKS) == "203.0.113.7"
assert len(calls) == 1 # stopped at the first success
@pytest.mark.unit
def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch):
seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")])
def fake_get(url, **kw):
return next(seq)
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(HTTP) == "198.51.100.42"
@pytest.mark.unit
def test_discover_egress_ip_all_fail_raises(monkeypatch):
def fake_get(url, **kw):
raise OSError("connection refused")
monkeypatch.setattr(_geo.requests, "get", fake_get)
with pytest.raises(GeoTimezoneError):
discover_egress_ip(SOCKS)
@pytest.mark.unit
def test_discover_egress_ip_no_proxy_is_direct(monkeypatch):
# proxy=None → direct request, requests.get must get proxies=None.
seen = {}
def fake_get(url, **kw):
seen["proxies"] = kw.get("proxies", "MISSING")
return _FakeResp("192.0.2.55")
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(None) == "192.0.2.55"
assert seen["proxies"] is None
# ──────────────────────────────────────────────────────────────────────
# ip_to_timezone — mocked mmdb reader
# ──────────────────────────────────────────────────────────────────────
class _FakeReader:
def __init__(self, record):
self._record = record
def __enter__(self):
return self
def __exit__(self, *a):
return False
def get(self, ip):
return self._record
def _install_fake_maxminddb(monkeypatch, record):
mod = types.ModuleType("maxminddb")
mod.open_database = lambda path: _FakeReader(record)
monkeypatch.setitem(sys.modules, "maxminddb", mod)
@pytest.mark.unit
def test_ip_to_timezone_reads_location_time_zone(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}})
assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome"
@pytest.mark.unit
def test_ip_to_timezone_ip_absent_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, None)
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
@pytest.mark.unit
def test_ip_to_timezone_missing_zone_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {}})
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
@pytest.mark.unit
def test_ip_to_timezone_invalid_iana_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}})
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
# ──────────────────────────────────────────────────────────────────────
# resolve_session_timezone — the precedence policy
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def stub_egress(monkeypatch):
"""Make egress resolution deterministic + offline; record if it ran."""
state = {"called": False}
def fake_discover(proxy=None, **kw):
state["called"] = True
state["proxy_arg"] = proxy
return "203.0.113.7"
monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover)
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York")
# ensure_geoip_mmdb is imported from .download at call time
import invisible_playwright.download as dl
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
return state
@pytest.mark.unit
def test_resolve_explicit_iana_wins(stub_egress):
# An explicit zone wins and never triggers resolution (proxy or not).
assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo"
assert resolve_session_timezone("Asia/Tokyo", None) == "Asia/Tokyo"
assert stub_egress["called"] is False
@pytest.mark.unit
def test_resolve_empty_with_proxy_resolves_from_proxy(stub_egress):
assert resolve_session_timezone("", SOCKS) == "America/New_York"
assert stub_egress["called"] is True
assert stub_egress["proxy_arg"] == SOCKS # routed through the proxy
@pytest.mark.unit
def test_resolve_auto_with_proxy_resolves_from_proxy(stub_egress):
assert resolve_session_timezone("auto", HTTP) == "America/New_York"
assert stub_egress["proxy_arg"] == HTTP
@pytest.mark.unit
def test_resolve_empty_no_proxy_resolves_from_host(stub_egress):
# auto ALWAYS resolves — without a proxy, from the host's own public IP.
assert resolve_session_timezone("", None) == "America/New_York"
assert stub_egress["called"] is True
assert stub_egress["proxy_arg"] is None # direct request, no proxy
@pytest.mark.unit
def test_resolve_auto_no_proxy_resolves_from_host(stub_egress):
assert resolve_session_timezone("auto", None) == "America/New_York"
assert stub_egress["proxy_arg"] is None
@pytest.mark.unit
def test_resolve_direct_proxy_resolves_via_host(stub_egress):
# direct:// counts as "no proxy" → resolve from the host IP, don't skip.
assert resolve_session_timezone("auto", {"server": "direct://"}) == "America/New_York"
assert stub_egress["proxy_arg"] is None
@pytest.mark.unit
def test_resolve_no_proxy_failure_falls_back_to_host(monkeypatch):
# Without a proxy, a lookup failure must NOT break the launch → host TZ ("").
def boom(proxy=None, **kw):
raise GeoTimezoneError("offline")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
assert resolve_session_timezone("auto", None) == ""
assert resolve_session_timezone("", None) == ""
@pytest.mark.unit
def test_resolve_proxy_failure_raises(monkeypatch):
# With a proxy set, a failure must raise — never a silent host-TZ fallback.
def boom(proxy=None, **kw):
raise GeoTimezoneError("no egress")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
with pytest.raises(GeoTimezoneError):
resolve_session_timezone("auto", SOCKS)
with pytest.raises(GeoTimezoneError):
resolve_session_timezone("", SOCKS)
# ──────────────────────────────────────────────────────────────────────
# prepare_session_geo — timezone + WebRTC IP from ONE egress lookup
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_geo_auto_proxy_sets_tz_and_webrtc(stub_egress):
g = prepare_session_geo("", SOCKS, want_webrtc=True)
assert g.timezone == "America/New_York"
assert g.webrtc_ip == "203.0.113.7" # proxy egress (IPv4) → synthetic srflx
@pytest.mark.unit
def test_geo_explicit_proxy_keeps_tz_but_still_sets_webrtc(stub_egress):
# an explicit zone doesn't resolve tz, but WebRTC still gets the proxy IP.
g = prepare_session_geo("Asia/Tokyo", SOCKS, want_webrtc=True)
assert g.timezone == "Asia/Tokyo"
assert g.webrtc_ip == "203.0.113.7"
@pytest.mark.unit
def test_geo_want_webrtc_false_skips_webrtc(stub_egress):
# caller already pinned the env var → don't auto-derive.
g = prepare_session_geo("auto", SOCKS, want_webrtc=False)
assert g.timezone == "America/New_York"
assert g.webrtc_ip is None
@pytest.mark.unit
def test_geo_no_proxy_no_webrtc(stub_egress):
g = prepare_session_geo("auto", None, want_webrtc=True)
assert g.timezone == "America/New_York" # resolved from host IP
assert g.webrtc_ip is None # no proxy → no synthetic srflx needed
@pytest.mark.unit
def test_geo_ipv6_egress_no_webrtc(monkeypatch):
monkeypatch.setattr(_geo, "discover_egress_ip", lambda *a, **k: "2001:db8::1")
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "Europe/Berlin")
import invisible_playwright.download as dl
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
g = prepare_session_geo("auto", SOCKS, want_webrtc=True)
assert g.timezone == "Europe/Berlin"
assert g.webrtc_ip is None # IPv6 not injected as a srflx candidate
@pytest.mark.unit
def test_geo_explicit_proxy_discovery_fail_is_best_effort(monkeypatch):
# explicit tz + proxy + discovery fails → keep tz, webrtc None, NO raise.
def boom(*a, **k):
raise GeoTimezoneError("down")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
g = prepare_session_geo("Asia/Tokyo", SOCKS, want_webrtc=True)
assert g.timezone == "Asia/Tokyo"
assert g.webrtc_ip is None
@pytest.mark.unit
def test_geo_auto_proxy_discovery_fail_raises(monkeypatch):
def boom(*a, **k):
raise GeoTimezoneError("down")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
with pytest.raises(GeoTimezoneError):
prepare_session_geo("auto", SOCKS, want_webrtc=True)