mirror of
https://github.com/feder-cr/invisible_playwright.git
synced 2026-06-07 08:35:12 +02:00
feat(webrtc): auto-derive the WebRTC public IP + suppress IPv6 from the proxy egress
Reuse the single timezone="auto" egress lookup to also make WebRTC coherent: - prepare_session_geo() returns (timezone, webrtc_ip) from one discovery; resolve_session_timezone delegates to it (timezone-only). - launcher/async _build_env set STEALTHFOX_WEBRTC_PUBLIC_IP to the proxy egress (IPv4, when the caller has not pinned it) so the srflx candidate matches the proxy, and STEALTHFOX_WEBRTC_DISABLE_IPV6=1 to drop the leaking IPv6 host. - prefs: baseline sets zoom.stealth.webrtc.disable_ipv6=true; the dead media.peerconnection.ice.disableIPv6 pref is removed. - export prepare_session_geo / SessionGeo. Needs a binary with the nICEr IPv6 patch to take full effect (env/pref are no-ops on firefox-7); the public-IP srflx already works on firefox-7. tests: tests/test_geo.py prepare_session_geo combos; full unit suite 436 green plus live ICE verification against the patched build.
This commit is contained in:
parent
26fa962d24
commit
db1d3ec359
6 changed files with 202 additions and 43 deletions
|
|
@ -17,7 +17,12 @@ Quickstart:
|
|||
"""
|
||||
from .config import get_default_args, get_default_stealth_prefs
|
||||
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
|
||||
from ._geo import GeoTimezoneError, resolve_session_timezone
|
||||
from ._geo import (
|
||||
GeoTimezoneError,
|
||||
SessionGeo,
|
||||
prepare_session_geo,
|
||||
resolve_session_timezone,
|
||||
)
|
||||
from .download import ensure_binary, ensure_geoip_mmdb
|
||||
from .launcher import InvisiblePlaywright
|
||||
|
||||
|
|
@ -37,6 +42,8 @@ __all__ = [
|
|||
"get_default_stealth_prefs",
|
||||
"get_default_args",
|
||||
"resolve_session_timezone",
|
||||
"prepare_session_geo",
|
||||
"SessionGeo",
|
||||
"GeoTimezoneError",
|
||||
"BINARY_VERSION",
|
||||
"FIREFOX_UPSTREAM_VERSION",
|
||||
|
|
|
|||
|
|
@ -18,11 +18,16 @@ On failure:
|
|||
we fail loudly rather than fall back silently.
|
||||
without a proxy → "" (host) the host TZ is a safe default, so a transient
|
||||
lookup failure must not break the launch.
|
||||
|
||||
``prepare_session_geo`` reuses the SAME single egress lookup to also return the
|
||||
proxy egress IP for the WebRTC synthetic srflx candidate (consumed by the C++
|
||||
``nr_stealth_bridge`` via ``STEALTHFOX_WEBRTC_PUBLIC_IP``), so the WebRTC public
|
||||
IP matches the proxy instead of leaking the host IP.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, NamedTuple, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
|
|
@ -136,6 +141,70 @@ def ip_to_timezone(ip: str, mmdb_path: Any) -> str:
|
|||
return tz
|
||||
|
||||
|
||||
class SessionGeo(NamedTuple):
|
||||
"""Egress-derived session signals."""
|
||||
|
||||
timezone: str # resolved IANA zone, or "" = host TZ
|
||||
webrtc_ip: Optional[str] # proxy egress IPv4 for the synthetic srflx, or None
|
||||
|
||||
|
||||
def prepare_session_geo(
|
||||
timezone: str,
|
||||
proxy: Optional[Dict[str, str]],
|
||||
*,
|
||||
want_webrtc: bool = True,
|
||||
) -> SessionGeo:
|
||||
"""Resolve the timezone AND the WebRTC public IP from ONE egress lookup.
|
||||
|
||||
Timezone precedence is the same as :func:`resolve_session_timezone`. When a
|
||||
proxy is set the proxy egress IP is discovered once and reused for both the
|
||||
timezone and (if ``want_webrtc`` and the IP is IPv4) the WebRTC srflx
|
||||
candidate, so the WebRTC public IP matches the proxy instead of leaking the
|
||||
host IP. Pass ``want_webrtc=False`` if the caller already pinned
|
||||
``STEALTHFOX_WEBRTC_PUBLIC_IP``.
|
||||
"""
|
||||
from .download import ensure_geoip_mmdb
|
||||
|
||||
tz_in = (timezone or "").strip()
|
||||
explicit = bool(tz_in) and tz_in.lower() != "auto"
|
||||
proxy_set = _proxy_is_set(proxy)
|
||||
|
||||
# One egress discovery when a proxy is set (feeds tz-auto and/or webrtc).
|
||||
egress_ip: Optional[str] = None
|
||||
discover_err: Optional[Exception] = None
|
||||
if proxy_set and (not explicit or want_webrtc):
|
||||
try:
|
||||
egress_ip = discover_egress_ip(proxy)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
discover_err = exc
|
||||
|
||||
# ── timezone ──
|
||||
if explicit:
|
||||
tz = tz_in
|
||||
elif proxy_set:
|
||||
if egress_ip is None:
|
||||
# auto behind a proxy must resolve — fail-early (timezone_mismatch).
|
||||
raise discover_err or GeoTimezoneError("could not resolve egress timezone")
|
||||
tz = ip_to_timezone(egress_ip, ensure_geoip_mmdb())
|
||||
else:
|
||||
# no proxy → host public IP; a transient failure falls back to host TZ.
|
||||
try:
|
||||
tz = ip_to_timezone(discover_egress_ip(None), ensure_geoip_mmdb())
|
||||
except Exception: # noqa: BLE001
|
||||
tz = ""
|
||||
|
||||
# ── webrtc ── only with a proxy, a successful discovery, and an IPv4 egress
|
||||
webrtc_ip: Optional[str] = None
|
||||
if proxy_set and want_webrtc and egress_ip:
|
||||
try:
|
||||
if ipaddress.ip_address(egress_ip).version == 4:
|
||||
webrtc_ip = egress_ip
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return SessionGeo(timezone=tz, webrtc_ip=webrtc_ip)
|
||||
|
||||
|
||||
def resolve_session_timezone(
|
||||
timezone: str, proxy: Optional[Dict[str, str]]
|
||||
) -> str:
|
||||
|
|
@ -146,19 +215,7 @@ def resolve_session_timezone(
|
|||
host's own public IP). On failure: with a proxy we raise
|
||||
:class:`GeoTimezoneError` (never silently use the host TZ behind a foreign
|
||||
proxy); without a proxy we fall back to ``""`` (host TZ) so a transient
|
||||
lookup failure can't break the launch.
|
||||
lookup failure can't break the launch. WebRTC is left to the caller — use
|
||||
:func:`prepare_session_geo` to resolve both from a single lookup.
|
||||
"""
|
||||
tz = (timezone or "").strip()
|
||||
if tz and tz.lower() != "auto":
|
||||
return tz # explicit IANA wins
|
||||
# "" or "auto" → always resolve from the egress IP.
|
||||
from .download import ensure_geoip_mmdb
|
||||
|
||||
proxy_set = _proxy_is_set(proxy)
|
||||
try:
|
||||
ip = discover_egress_ip(proxy if proxy_set else None)
|
||||
return ip_to_timezone(ip, ensure_geoip_mmdb())
|
||||
except Exception:
|
||||
if proxy_set:
|
||||
raise # fail-early behind a proxy (timezone_mismatch trap)
|
||||
return "" # no proxy: host TZ is a safe fallback
|
||||
return prepare_session_geo(timezone, proxy, want_webrtc=False).timezone
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from typing import Any, Dict, Optional, Union
|
|||
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
|
||||
|
||||
from ._fpforge import Profile, generate_profile
|
||||
from ._geo import resolve_session_timezone
|
||||
from ._geo import prepare_session_geo
|
||||
from ._headless import make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from .download import ensure_binary
|
||||
|
|
@ -63,6 +63,7 @@ class InvisiblePlaywright:
|
|||
self._humanize = humanize
|
||||
self._locale = locale
|
||||
self._timezone = timezone
|
||||
self._webrtc_ip: Optional[str] = None # auto-set from proxy egress at launch
|
||||
self._extra_prefs = extra_prefs
|
||||
self._binary_path = binary_path
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
|
|
@ -75,14 +76,18 @@ class InvisiblePlaywright:
|
|||
self._virtual_display: Any = None
|
||||
|
||||
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
||||
import os as _os
|
||||
import sys as _sys
|
||||
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||
# concrete IANA zone before anything reads self._timezone. Run the
|
||||
# blocking geo lookup off the event loop. Fail-early if a proxy is set
|
||||
# but the egress zone can't be resolved.
|
||||
self._timezone = await asyncio.to_thread(
|
||||
resolve_session_timezone, self._timezone, self._proxy
|
||||
# Resolve timezone="auto" AND the WebRTC public IP from one egress lookup
|
||||
# before anything reads them. Run the blocking geo lookup off the event
|
||||
# loop. Fail-early if a proxy is set but the egress zone can't resolve;
|
||||
# don't auto-set the WebRTC IP if the caller already pinned the env var.
|
||||
want_webrtc = not _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP")
|
||||
geo = await asyncio.to_thread(
|
||||
prepare_session_geo, self._timezone, self._proxy, want_webrtc=want_webrtc,
|
||||
)
|
||||
self._timezone = geo.timezone
|
||||
self._webrtc_ip = geo.webrtc_ip
|
||||
executable = self._binary_path or ensure_binary()
|
||||
prefs = translate_profile_to_prefs(
|
||||
self._profile,
|
||||
|
|
@ -203,12 +208,18 @@ class InvisiblePlaywright:
|
|||
env = _os.environ.copy()
|
||||
if self._timezone:
|
||||
env["TZ"] = _tz_env(self._timezone)
|
||||
# Propagate STEALTHFOX_WEBRTC_PUBLIC_IP if the process set it — read
|
||||
# by nICEr's nr_stealth_bridge to inject a synthetic srflx candidate
|
||||
# matching the proxy egress IP. This avoids the StaticPref IPC
|
||||
# propagation timing issue between parent and socket processes.
|
||||
if _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP"):
|
||||
env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"]
|
||||
# WebRTC public IP for nICEr's nr_stealth_bridge synthetic srflx (so
|
||||
# WebRTC reports the proxy egress, not the host IP). Prefer the IP
|
||||
# auto-derived from the proxy egress at launch; otherwise honour a
|
||||
# caller-set env var (which always wins, since auto-derivation is
|
||||
# skipped when it's present).
|
||||
webrtc_ip = self._webrtc_ip or _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP")
|
||||
if webrtc_ip:
|
||||
env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = webrtc_ip
|
||||
# Drop IPv6 host candidates (upstream disableIPv6 is dead in FF150). The
|
||||
# env var is read directly in the socket process by our nICEr patch.
|
||||
# Always on; a caller can pre-set it to "0" to opt out.
|
||||
env.setdefault("STEALTHFOX_WEBRTC_DISABLE_IPV6", "1")
|
||||
return env
|
||||
|
||||
def _resolve_headless(self) -> bool:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from typing import Any, Dict, Optional, Union
|
|||
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
|
||||
|
||||
from ._fpforge import Profile, generate_profile
|
||||
from ._geo import resolve_session_timezone
|
||||
from ._geo import prepare_session_geo
|
||||
from ._headless import make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from .download import ensure_binary
|
||||
|
|
@ -171,6 +171,7 @@ class InvisiblePlaywright:
|
|||
self._humanize = humanize
|
||||
self._locale = locale
|
||||
self._timezone = timezone
|
||||
self._webrtc_ip: Optional[str] = None # auto-set from proxy egress at launch
|
||||
self._extra_prefs = extra_prefs
|
||||
self._binary_path = binary_path
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
|
|
@ -185,10 +186,17 @@ class InvisiblePlaywright:
|
|||
self._virtual_display: Any = None
|
||||
|
||||
def __enter__(self) -> Union[Browser, BrowserContext]:
|
||||
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
|
||||
# concrete IANA zone before anything reads self._timezone. Fail-early
|
||||
# if a proxy is set but the egress zone can't be resolved.
|
||||
self._timezone = resolve_session_timezone(self._timezone, self._proxy)
|
||||
# Resolve timezone="auto" AND the WebRTC public IP from one egress
|
||||
# lookup before anything reads them. Fail-early if a proxy is set but
|
||||
# the egress zone can't be resolved. Don't auto-set the WebRTC IP if the
|
||||
# caller already pinned STEALTHFOX_WEBRTC_PUBLIC_IP.
|
||||
import os as _os
|
||||
geo = prepare_session_geo(
|
||||
self._timezone, self._proxy,
|
||||
want_webrtc=not _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP"),
|
||||
)
|
||||
self._timezone = geo.timezone
|
||||
self._webrtc_ip = geo.webrtc_ip
|
||||
executable = self._binary_path or ensure_binary()
|
||||
prefs = self._build_prefs()
|
||||
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)
|
||||
|
|
@ -354,12 +362,18 @@ class InvisiblePlaywright:
|
|||
env = _os.environ.copy()
|
||||
if self._timezone:
|
||||
env["TZ"] = _tz_env(self._timezone)
|
||||
# Propagate STEALTHFOX_WEBRTC_PUBLIC_IP if the process set it — read
|
||||
# by nICEr's nr_stealth_bridge to inject a synthetic srflx candidate
|
||||
# matching the proxy egress IP. This avoids the StaticPref IPC
|
||||
# propagation timing issue between parent and socket processes.
|
||||
if _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP"):
|
||||
env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = _os.environ["STEALTHFOX_WEBRTC_PUBLIC_IP"]
|
||||
# WebRTC public IP for nICEr's nr_stealth_bridge to inject a synthetic
|
||||
# srflx candidate matching the proxy egress (so WebRTC reports the proxy
|
||||
# IP, not the host's). Prefer the IP auto-derived from the proxy egress
|
||||
# at launch; otherwise honour a caller-set env var (which always wins,
|
||||
# since we skip auto-derivation when it's present).
|
||||
webrtc_ip = self._webrtc_ip or _os.environ.get("STEALTHFOX_WEBRTC_PUBLIC_IP")
|
||||
if webrtc_ip:
|
||||
env["STEALTHFOX_WEBRTC_PUBLIC_IP"] = webrtc_ip
|
||||
# Drop IPv6 host candidates (upstream disableIPv6 is dead in FF150). The
|
||||
# env var is read directly in the socket process by our nICEr patch.
|
||||
# Always on; a caller can pre-set it to "0" to opt out.
|
||||
env.setdefault("STEALTHFOX_WEBRTC_DISABLE_IPV6", "1")
|
||||
return env
|
||||
|
||||
def _resolve_headless(self) -> bool:
|
||||
|
|
|
|||
|
|
@ -211,12 +211,15 @@ _BASELINE: Dict[str, Any] = {
|
|||
# WebRTC: enabled, no public IP leak.
|
||||
# obfuscate_host_addresses=false: our C++ injection handles candidate
|
||||
# selection; mDNS causes mDNS-IPC to hang in sandboxed content processes.
|
||||
# disableIPv6=true keeps IPv6 out of gathering (less entropy, no IPv6 leak).
|
||||
# IPv6 host candidates are dropped by our nICEr patch via
|
||||
# zoom.stealth.webrtc.disable_ipv6 (+ env STEALTHFOX_WEBRTC_DISABLE_IPV6).
|
||||
# The upstream media.peerconnection.ice.disableIPv6 pref is dead in FF150
|
||||
# (read nowhere in the ICE path) so it is NOT set here.
|
||||
"media.peerconnection.enabled": True,
|
||||
"media.peerconnection.ice.no_host": False,
|
||||
"media.peerconnection.ice.default_address_only": False,
|
||||
"media.peerconnection.ice.obfuscate_host_addresses": False,
|
||||
"media.peerconnection.ice.disableIPv6": True,
|
||||
"zoom.stealth.webrtc.disable_ipv6": True,
|
||||
"media.peerconnection.ice.proxy_only": False,
|
||||
"media.peerconnection.ice.relay_only": False,
|
||||
"media.peerconnection.use_document_iceservers": True,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from invisible_playwright._geo import (
|
|||
_proxy_is_set,
|
||||
discover_egress_ip,
|
||||
ip_to_timezone,
|
||||
prepare_session_geo,
|
||||
resolve_session_timezone,
|
||||
)
|
||||
|
||||
|
|
@ -286,3 +287,69 @@ def test_resolve_proxy_failure_raises(monkeypatch):
|
|||
resolve_session_timezone("auto", SOCKS)
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
resolve_session_timezone("", SOCKS)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# prepare_session_geo — timezone + WebRTC IP from ONE egress lookup
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
@pytest.mark.unit
|
||||
def test_geo_auto_proxy_sets_tz_and_webrtc(stub_egress):
|
||||
g = prepare_session_geo("", SOCKS, want_webrtc=True)
|
||||
assert g.timezone == "America/New_York"
|
||||
assert g.webrtc_ip == "203.0.113.7" # proxy egress (IPv4) → synthetic srflx
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_geo_explicit_proxy_keeps_tz_but_still_sets_webrtc(stub_egress):
|
||||
# an explicit zone doesn't resolve tz, but WebRTC still gets the proxy IP.
|
||||
g = prepare_session_geo("Asia/Tokyo", SOCKS, want_webrtc=True)
|
||||
assert g.timezone == "Asia/Tokyo"
|
||||
assert g.webrtc_ip == "203.0.113.7"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_geo_want_webrtc_false_skips_webrtc(stub_egress):
|
||||
# caller already pinned the env var → don't auto-derive.
|
||||
g = prepare_session_geo("auto", SOCKS, want_webrtc=False)
|
||||
assert g.timezone == "America/New_York"
|
||||
assert g.webrtc_ip is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_geo_no_proxy_no_webrtc(stub_egress):
|
||||
g = prepare_session_geo("auto", None, want_webrtc=True)
|
||||
assert g.timezone == "America/New_York" # resolved from host IP
|
||||
assert g.webrtc_ip is None # no proxy → no synthetic srflx needed
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_geo_ipv6_egress_no_webrtc(monkeypatch):
|
||||
monkeypatch.setattr(_geo, "discover_egress_ip", lambda *a, **k: "2001:db8::1")
|
||||
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "Europe/Berlin")
|
||||
import invisible_playwright.download as dl
|
||||
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
|
||||
g = prepare_session_geo("auto", SOCKS, want_webrtc=True)
|
||||
assert g.timezone == "Europe/Berlin"
|
||||
assert g.webrtc_ip is None # IPv6 not injected as a srflx candidate
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_geo_explicit_proxy_discovery_fail_is_best_effort(monkeypatch):
|
||||
# explicit tz + proxy + discovery fails → keep tz, webrtc None, NO raise.
|
||||
def boom(*a, **k):
|
||||
raise GeoTimezoneError("down")
|
||||
|
||||
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||
g = prepare_session_geo("Asia/Tokyo", SOCKS, want_webrtc=True)
|
||||
assert g.timezone == "Asia/Tokyo"
|
||||
assert g.webrtc_ip is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_geo_auto_proxy_discovery_fail_raises(monkeypatch):
|
||||
def boom(*a, **k):
|
||||
raise GeoTimezoneError("down")
|
||||
|
||||
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
|
||||
with pytest.raises(GeoTimezoneError):
|
||||
prepare_session_geo("auto", SOCKS, want_webrtc=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue