diff --git a/src/invisible_playwright/__init__.py b/src/invisible_playwright/__init__.py index 0871021..5963883 100644 --- a/src/invisible_playwright/__init__.py +++ b/src/invisible_playwright/__init__.py @@ -17,7 +17,12 @@ Quickstart: """ from .config import get_default_args, get_default_stealth_prefs from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION -from ._geo import GeoTimezoneError, resolve_session_timezone +from ._geo import ( + GeoTimezoneError, + SessionGeo, + prepare_session_geo, + resolve_session_timezone, +) from .download import ensure_binary, ensure_geoip_mmdb from .launcher import InvisiblePlaywright @@ -37,6 +42,8 @@ __all__ = [ "get_default_stealth_prefs", "get_default_args", "resolve_session_timezone", + "prepare_session_geo", + "SessionGeo", "GeoTimezoneError", "BINARY_VERSION", "FIREFOX_UPSTREAM_VERSION", diff --git a/src/invisible_playwright/_geo.py b/src/invisible_playwright/_geo.py index 02971e1..db3eefa 100644 --- a/src/invisible_playwright/_geo.py +++ b/src/invisible_playwright/_geo.py @@ -18,11 +18,16 @@ On failure: we fail loudly rather than fall back silently. without a proxy → "" (host) the host TZ is a safe default, so a transient lookup failure must not break the launch. + +``prepare_session_geo`` reuses the SAME single egress lookup to also return the +proxy egress IP for the WebRTC synthetic srflx candidate (consumed by the C++ +``nr_stealth_bridge`` via ``STEALTHFOX_WEBRTC_PUBLIC_IP``), so the WebRTC public +IP matches the proxy instead of leaking the host IP. """ from __future__ import annotations import ipaddress -from typing import Any, Dict, Optional +from typing import Any, Dict, NamedTuple, Optional from urllib.parse import quote import requests @@ -136,6 +141,70 @@ def ip_to_timezone(ip: str, mmdb_path: Any) -> str: return tz +class SessionGeo(NamedTuple): + """Egress-derived session signals.""" + + timezone: str # resolved IANA zone, or "" = host TZ + webrtc_ip: Optional[str] # proxy egress IPv4 for the synthetic srflx, or None + + +def prepare_session_geo( + timezone: str, + proxy: Optional[Dict[str, str]], + *, + want_webrtc: bool = True, +) -> SessionGeo: + """Resolve the timezone AND the WebRTC public IP from ONE egress lookup. + + Timezone precedence is the same as :func:`resolve_session_timezone`. When a + proxy is set the proxy egress IP is discovered once and reused for both the + timezone and (if ``want_webrtc`` and the IP is IPv4) the WebRTC srflx + candidate, so the WebRTC public IP matches the proxy instead of leaking the + host IP. Pass ``want_webrtc=False`` if the caller already pinned + ``STEALTHFOX_WEBRTC_PUBLIC_IP``. + """ + from .download import ensure_geoip_mmdb + + tz_in = (timezone or "").strip() + explicit = bool(tz_in) and tz_in.lower() != "auto" + proxy_set = _proxy_is_set(proxy) + + # One egress discovery when a proxy is set (feeds tz-auto and/or webrtc). + egress_ip: Optional[str] = None + discover_err: Optional[Exception] = None + if proxy_set and (not explicit or want_webrtc): + try: + egress_ip = discover_egress_ip(proxy) + except Exception as exc: # noqa: BLE001 + discover_err = exc + + # ── timezone ── + if explicit: + tz = tz_in + elif proxy_set: + if egress_ip is None: + # auto behind a proxy must resolve — fail-early (timezone_mismatch). + raise discover_err or GeoTimezoneError("could not resolve egress timezone") + tz = ip_to_timezone(egress_ip, ensure_geoip_mmdb()) + else: + # no proxy → host public IP; a transient failure falls back to host TZ. + try: + tz = ip_to_timezone(discover_egress_ip(None), ensure_geoip_mmdb()) + except Exception: # noqa: BLE001 + tz = "" + + # ── webrtc ── only with a proxy, a successful discovery, and an IPv4 egress + webrtc_ip: Optional[str] = None + if proxy_set and want_webrtc and egress_ip: + try: + if ipaddress.ip_address(egress_ip).version == 4: + webrtc_ip = egress_ip + except ValueError: + pass + + return SessionGeo(timezone=tz, webrtc_ip=webrtc_ip) + + def resolve_session_timezone( timezone: str, proxy: Optional[Dict[str, str]] ) -> str: @@ -146,19 +215,7 @@ def resolve_session_timezone( host's own public IP). On failure: with a proxy we raise :class:`GeoTimezoneError` (never silently use the host TZ behind a foreign proxy); without a proxy we fall back to ``""`` (host TZ) so a transient - lookup failure can't break the launch. + lookup failure can't break the launch. WebRTC is left to the caller — use + :func:`prepare_session_geo` to resolve both from a single lookup. """ - tz = (timezone or "").strip() - if tz and tz.lower() != "auto": - return tz # explicit IANA wins - # "" or "auto" → always resolve from the egress IP. - from .download import ensure_geoip_mmdb - - proxy_set = _proxy_is_set(proxy) - try: - ip = discover_egress_ip(proxy if proxy_set else None) - return ip_to_timezone(ip, ensure_geoip_mmdb()) - except Exception: - if proxy_set: - raise # fail-early behind a proxy (timezone_mismatch trap) - return "" # no proxy: host TZ is a safe fallback + return prepare_session_geo(timezone, proxy, want_webrtc=False).timezone diff --git a/src/invisible_playwright/async_api.py b/src/invisible_playwright/async_api.py index 70a7aeb..84d3986 100644 --- a/src/invisible_playwright/async_api.py +++ b/src/invisible_playwright/async_api.py @@ -9,7 +9,7 @@ from typing import Any, Dict, Optional, Union from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright from ._fpforge import Profile, generate_profile -from ._geo import resolve_session_timezone +from ._geo import prepare_session_geo from ._headless import make_virtual_display from ._proxy import configure_proxy as _configure_proxy_shared from .download import ensure_binary @@ -63,6 +63,7 @@ class InvisiblePlaywright: self._humanize = humanize self._locale = locale self._timezone = timezone + self._webrtc_ip: Optional[str] = None # auto-set from proxy egress at launch self._extra_prefs = extra_prefs self._binary_path = binary_path self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None @@ -75,14 +76,18 @@ class InvisiblePlaywright: self._virtual_display: Any = None async def __aenter__(self) -> Union[Browser, BrowserContext]: + import os as _os import sys as _sys - # Resolve timezone="auto" (and the proxy-set-but-unset default) to a - # concrete IANA zone before anything reads self._timezone. Run the - # blocking geo lookup off the event loop. Fail-early if a proxy is set - # but the egress zone can't be resolved. - self._timezone = await asyncio.to_thread( - resolve_session_timezone, self._timezone, self._proxy + # Resolve timezone="auto" AND the WebRTC public IP from one egress lookup + # before anything reads them. Run the blocking geo lookup off the event + # loop. Fail-early if a proxy is set but the egress zone can't resolve; + # don't auto-set the WebRTC IP if the caller already pinned the env var. + want_webrtc = not _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP") + geo = await asyncio.to_thread( + prepare_session_geo, self._timezone, self._proxy, want_webrtc=want_webrtc, ) + self._timezone = geo.timezone + self._webrtc_ip = geo.webrtc_ip executable = self._binary_path or ensure_binary() prefs = translate_profile_to_prefs( self._profile, @@ -203,12 +208,18 @@ class InvisiblePlaywright: env = _os.environ.copy() if self._timezone: env["TZ"] = _tz_env(self._timezone) - # Propagate STEALTHFOX_WEBRTC_PUBLIC_IP if the process set it — read - # by nICEr's nr_stealth_bridge to inject a synthetic srflx candidate - # matching the proxy egress IP. This avoids the StaticPref IPC - # propagation timing issue between parent and socket processes. - if _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP"): - env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] + # WebRTC public IP for nICEr's nr_stealth_bridge synthetic srflx (so + # WebRTC reports the proxy egress, not the host IP). Prefer the IP + # auto-derived from the proxy egress at launch; otherwise honour a + # caller-set env var (which always wins, since auto-derivation is + # skipped when it's present). + webrtc_ip = self._webrtc_ip or _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP") + if webrtc_ip: + env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = webrtc_ip + # Drop IPv6 host candidates (upstream disableIPv6 is dead in FF150). The + # env var is read directly in the socket process by our nICEr patch. + # Always on; a caller can pre-set it to "0" to opt out. + env.setdefault("STEALTHFOX_WEBRTC_DISABLE_IPV6", "1") return env def _resolve_headless(self) -> bool: diff --git a/src/invisible_playwright/launcher.py b/src/invisible_playwright/launcher.py index 15055ee..9d81d25 100644 --- a/src/invisible_playwright/launcher.py +++ b/src/invisible_playwright/launcher.py @@ -8,7 +8,7 @@ from typing import Any, Dict, Optional, Union from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright from ._fpforge import Profile, generate_profile -from ._geo import resolve_session_timezone +from ._geo import prepare_session_geo from ._headless import make_virtual_display from ._proxy import configure_proxy as _configure_proxy_shared from .download import ensure_binary @@ -171,6 +171,7 @@ class InvisiblePlaywright: self._humanize = humanize self._locale = locale self._timezone = timezone + self._webrtc_ip: Optional[str] = None # auto-set from proxy egress at launch self._extra_prefs = extra_prefs self._binary_path = binary_path self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None @@ -185,10 +186,17 @@ class InvisiblePlaywright: self._virtual_display: Any = None def __enter__(self) -> Union[Browser, BrowserContext]: - # Resolve timezone="auto" (and the proxy-set-but-unset default) to a - # concrete IANA zone before anything reads self._timezone. Fail-early - # if a proxy is set but the egress zone can't be resolved. - self._timezone = resolve_session_timezone(self._timezone, self._proxy) + # Resolve timezone="auto" AND the WebRTC public IP from one egress + # lookup before anything reads them. Fail-early if a proxy is set but + # the egress zone can't be resolved. Don't auto-set the WebRTC IP if the + # caller already pinned STEALTHFOX_WEBRTC_PUBLIC_IP. + import os as _os + geo = prepare_session_geo( + self._timezone, self._proxy, + want_webrtc=not _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP"), + ) + self._timezone = geo.timezone + self._webrtc_ip = geo.webrtc_ip executable = self._binary_path or ensure_binary() prefs = self._build_prefs() playwright_proxy = _configure_proxy_shared(self._proxy, prefs) @@ -354,12 +362,18 @@ class InvisiblePlaywright: env = _os.environ.copy() if self._timezone: env["TZ"] = _tz_env(self._timezone) - # Propagate STEALTHFOX_WEBRTC_PUBLIC_IP if the process set it — read - # by nICEr's nr_stealth_bridge to inject a synthetic srflx candidate - # matching the proxy egress IP. This avoids the StaticPref IPC - # propagation timing issue between parent and socket processes. - if _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP"): - env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"] + # WebRTC public IP for nICEr's nr_stealth_bridge to inject a synthetic + # srflx candidate matching the proxy egress (so WebRTC reports the proxy + # IP, not the host's). Prefer the IP auto-derived from the proxy egress + # at launch; otherwise honour a caller-set env var (which always wins, + # since we skip auto-derivation when it's present). + webrtc_ip = self._webrtc_ip or _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP") + if webrtc_ip: + env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = webrtc_ip + # Drop IPv6 host candidates (upstream disableIPv6 is dead in FF150). The + # env var is read directly in the socket process by our nICEr patch. + # Always on; a caller can pre-set it to "0" to opt out. + env.setdefault("STEALTHFOX_WEBRTC_DISABLE_IPV6", "1") return env def _resolve_headless(self) -> bool: diff --git a/src/invisible_playwright/prefs.py b/src/invisible_playwright/prefs.py index 4f0a15d..2803328 100644 --- a/src/invisible_playwright/prefs.py +++ b/src/invisible_playwright/prefs.py @@ -211,12 +211,15 @@ _BASELINE: Dict[str, Any] = { # WebRTC: enabled, no public IP leak. # obfuscate_host_addresses=false: our C++ injection handles candidate # selection; mDNS causes mDNS-IPC to hang in sandboxed content processes. - # disableIPv6=true keeps IPv6 out of gathering (less entropy, no IPv6 leak). + # IPv6 host candidates are dropped by our nICEr patch via + # zoom.stealth.webrtc.disable_ipv6 (+ env STEALTHFOX_WEBRTC_DISABLE_IPV6). + # The upstream media.peerconnection.ice.disableIPv6 pref is dead in FF150 + # (read nowhere in the ICE path) so it is NOT set here. "media.peerconnection.enabled": True, "media.peerconnection.ice.no_host": False, "media.peerconnection.ice.default_address_only": False, "media.peerconnection.ice.obfuscate_host_addresses": False, - "media.peerconnection.ice.disableIPv6": True, + "zoom.stealth.webrtc.disable_ipv6": True, "media.peerconnection.ice.proxy_only": False, "media.peerconnection.ice.relay_only": False, "media.peerconnection.use_document_iceservers": True, diff --git a/tests/test_geo.py b/tests/test_geo.py index 39ef5ee..c8a28e6 100644 --- a/tests/test_geo.py +++ b/tests/test_geo.py @@ -16,6 +16,7 @@ from invisible_playwright._geo import ( _proxy_is_set, discover_egress_ip, ip_to_timezone, + prepare_session_geo, resolve_session_timezone, ) @@ -286,3 +287,69 @@ def test_resolve_proxy_failure_raises(monkeypatch): resolve_session_timezone("auto", SOCKS) with pytest.raises(GeoTimezoneError): resolve_session_timezone("", SOCKS) + + +# ────────────────────────────────────────────────────────────────────── +# prepare_session_geo — timezone + WebRTC IP from ONE egress lookup +# ────────────────────────────────────────────────────────────────────── +@pytest.mark.unit +def test_geo_auto_proxy_sets_tz_and_webrtc(stub_egress): + g = prepare_session_geo("", SOCKS, want_webrtc=True) + assert g.timezone == "America/New_York" + assert g.webrtc_ip == "203.0.113.7" # proxy egress (IPv4) → synthetic srflx + + +@pytest.mark.unit +def test_geo_explicit_proxy_keeps_tz_but_still_sets_webrtc(stub_egress): + # an explicit zone doesn't resolve tz, but WebRTC still gets the proxy IP. + g = prepare_session_geo("Asia/Tokyo", SOCKS, want_webrtc=True) + assert g.timezone == "Asia/Tokyo" + assert g.webrtc_ip == "203.0.113.7" + + +@pytest.mark.unit +def test_geo_want_webrtc_false_skips_webrtc(stub_egress): + # caller already pinned the env var → don't auto-derive. + g = prepare_session_geo("auto", SOCKS, want_webrtc=False) + assert g.timezone == "America/New_York" + assert g.webrtc_ip is None + + +@pytest.mark.unit +def test_geo_no_proxy_no_webrtc(stub_egress): + g = prepare_session_geo("auto", None, want_webrtc=True) + assert g.timezone == "America/New_York" # resolved from host IP + assert g.webrtc_ip is None # no proxy → no synthetic srflx needed + + +@pytest.mark.unit +def test_geo_ipv6_egress_no_webrtc(monkeypatch): + monkeypatch.setattr(_geo, "discover_egress_ip", lambda *a, **k: "2001:db8::1") + monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "Europe/Berlin") + import invisible_playwright.download as dl + monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb") + g = prepare_session_geo("auto", SOCKS, want_webrtc=True) + assert g.timezone == "Europe/Berlin" + assert g.webrtc_ip is None # IPv6 not injected as a srflx candidate + + +@pytest.mark.unit +def test_geo_explicit_proxy_discovery_fail_is_best_effort(monkeypatch): + # explicit tz + proxy + discovery fails → keep tz, webrtc None, NO raise. + def boom(*a, **k): + raise GeoTimezoneError("down") + + monkeypatch.setattr(_geo, "discover_egress_ip", boom) + g = prepare_session_geo("Asia/Tokyo", SOCKS, want_webrtc=True) + assert g.timezone == "Asia/Tokyo" + assert g.webrtc_ip is None + + +@pytest.mark.unit +def test_geo_auto_proxy_discovery_fail_raises(monkeypatch): + def boom(*a, **k): + raise GeoTimezoneError("down") + + monkeypatch.setattr(_geo, "discover_egress_ip", boom) + with pytest.raises(GeoTimezoneError): + prepare_session_geo("auto", SOCKS, want_webrtc=True)