feat: timezone="auto" derives the zone from the proxy egress IP

A proxy in a different country paired with the host timezone is the
classic timezone_mismatch signal, so a session with a proxy and no
explicit timezone now resolves the zone automatically.

- discover the egress IP through the proxy (SOCKS via requests[socks]),
  map it to an IANA zone with an offline mmdb (daijro/geoip-all-in-one,
  downloaded + cached like the Firefox binary; GPL so not vendored)
- precedence: explicit zone wins; ""+proxy and "auto"+proxy resolve;
  ""/"auto" without a proxy stay host; "host"/"local" force host TZ
- fail-early when a proxy is set but the zone cannot be resolved, never
  a silent host-TZ fallback
- deps: requests[socks], maxminddb, tzdata (zoneinfo ships no DB on Windows)
- resolve_session_timezone / ensure_geoip_mmdb exported for integrations
This commit is contained in:
feder-cr 2026-06-06 04:16:22 +02:00
parent 143aff4bd2
commit d6c3de7730
9 changed files with 537 additions and 2 deletions

View file

@ -141,6 +141,33 @@ with InvisiblePlaywright(proxy=proxy) as browser:
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
### Timezone
The browser timezone follows `timezone=`:
```python
# default: with a proxy, the timezone is auto-derived from the proxy egress IP
with InvisiblePlaywright(proxy=proxy) as browser:
...
# explicit IANA zone always wins
with InvisiblePlaywright(proxy=proxy, timezone="America/New_York") as browser:
...
# opt out and keep the host timezone even behind a proxy
with InvisiblePlaywright(proxy=proxy, timezone="host") as browser:
...
```
| `timezone=` | with proxy | without proxy |
|---|---|---|
| `""` (default) | auto-derived from egress IP | host timezone |
| `"auto"` | auto-derived from egress IP | host timezone |
| `"Area/City"` | that zone | that zone |
| `"host"` / `"local"` | host timezone | host timezone |
A proxy in a different country paired with the host timezone is the classic `timezone_mismatch` signal, so a proxy with no explicit timezone now resolves automatically. The egress IP is looked up through the proxy and mapped to its IANA zone with an offline database ([`daijro/geoip-all-in-one`](https://github.com/daijro/geoip-all-in-one)), downloaded and cached on first use. If a proxy is set but the zone can't be resolved, the launch raises rather than silently falling back to the host zone — pass an explicit `timezone=` or `timezone="host"` to override. Point `STEALTHFOX_GEOIP_MMDB` at your own `.mmdb` to skip the download.
### Pinning specific fingerprint fields
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:

View file

@ -22,7 +22,9 @@ classifiers = [
dependencies = [
"playwright>=1.40",
"platformdirs>=4",
"requests>=2.31",
"requests[socks]>=2.31",
"maxminddb>=2.2",
"tzdata>=2024.1",
"tqdm>=4.66",
"pywin32>=306; sys_platform == 'win32'",
]

View file

@ -17,7 +17,8 @@ Quickstart:
"""
from .config import get_default_args, get_default_stealth_prefs
from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION
from .download import ensure_binary
from ._geo import GeoTimezoneError, resolve_session_timezone
from .download import ensure_binary, ensure_geoip_mmdb
from .launcher import InvisiblePlaywright
from importlib.metadata import PackageNotFoundError, version as _pkg_version
@ -32,8 +33,11 @@ except PackageNotFoundError:
__all__ = [
"InvisiblePlaywright",
"ensure_binary",
"ensure_geoip_mmdb",
"get_default_stealth_prefs",
"get_default_args",
"resolve_session_timezone",
"GeoTimezoneError",
"BINARY_VERSION",
"FIREFOX_UPSTREAM_VERSION",
"__version__",

View file

@ -0,0 +1,160 @@
"""Resolve the session timezone from the proxy egress IP (``timezone="auto"``).
Approach B: discover the egress IP with one HTTP request routed *through the
configured proxy*, then map IP IANA timezone with an offline mmdb
(``daijro/geoip-all-in-one``, downloaded + cached by ``download.py``).
Precedence (see ``resolve_session_timezone``):
"host" / "local" "" force host TZ (escape hatch)
explicit IANA unchanged explicit always wins
"" + no proxy "" host TZ (default, unchanged behaviour)
"" + proxy egress NEW default: a proxy with no timezone is
exactly the timezone_mismatch trap, so we
auto-resolve it.
"auto" + no proxy "" nothing to resolve, fall back to host TZ
"auto" + proxy egress
When a proxy IS set we fail loudly rather than silently fall back to the host
TZ a foreign proxy paired with the host timezone is the precise signal
detectors flag as ``timezone_mismatch``.
"""
from __future__ import annotations
import ipaddress
from typing import Any, Dict, Optional
from urllib.parse import quote
import requests
class GeoTimezoneError(RuntimeError):
"""Raised when ``timezone="auto"`` cannot resolve a valid IANA zone."""
# Plain-text IP echo endpoints (each returns just the caller's public IP).
_IP_ECHO_ENDPOINTS = (
"https://api.ipify.org",
"https://icanhazip.com",
"https://checkip.amazonaws.com",
)
_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://")
def _proxy_is_set(proxy: Optional[Dict[str, str]]) -> bool:
if not proxy:
return False
server = (proxy.get("server") or "").strip()
return bool(server) and server.lower() != "direct://"
def _proxies_for_requests(proxy: Dict[str, str]) -> Dict[str, str]:
"""Translate our proxy dict into a ``requests`` proxies mapping.
SOCKS5 uses the ``socks5h`` scheme so DNS is resolved proxy-side (matches
``network.proxy.socks_remote_dns=True`` in the Firefox path). HTTP/HTTPS
pass through unchanged. Credentials are URL-encoded.
"""
server = (proxy.get("server") or "").strip()
low = server.lower()
if low.startswith("socks5://") or low.startswith("socks://"):
scheme = "socks5h"
elif low.startswith("socks4://"):
scheme = "socks4"
elif low.startswith("https://"):
scheme = "https"
else:
scheme = "http"
host_port = server.split("://", 1)[1] if "://" in server else server
user = proxy.get("username") or ""
pwd = proxy.get("password") or ""
if user:
auth = f"{quote(user, safe='')}:{quote(pwd, safe='')}@"
else:
auth = ""
url = f"{scheme}://{auth}{host_port}"
return {"http": url, "https": url}
def discover_egress_ip(
proxy: Dict[str, str], *, timeout: float = 10.0
) -> str:
"""Return the public IP seen when routing through ``proxy``.
Tries each echo endpoint in turn; raises :class:`GeoTimezoneError` if none
return a valid IP (SOCKS support requires ``requests[socks]`` / PySocks).
"""
proxies = _proxies_for_requests(proxy)
last_err: Optional[Exception] = None
for url in _IP_ECHO_ENDPOINTS:
try:
resp = requests.get(url, proxies=proxies, timeout=timeout)
resp.raise_for_status()
ip = resp.text.strip()
ipaddress.ip_address(ip) # validate (raises ValueError if not an IP)
return ip
except Exception as exc: # noqa: BLE001 - try the next endpoint
last_err = exc
continue
raise GeoTimezoneError(
f"could not discover the proxy egress IP via {len(_IP_ECHO_ENDPOINTS)} "
f"endpoints (last error: {last_err!r}). For SOCKS proxies make sure "
f"requests[socks] / PySocks is installed."
)
def ip_to_timezone(ip: str, mmdb_path: Any) -> str:
"""Map ``ip`` to its IANA timezone using the offline mmdb.
Reads the standard MaxMind ``location.time_zone`` field and validates it
against the system tz database. Raises :class:`GeoTimezoneError` if the IP
is absent from the DB or the zone is missing / not a valid IANA name.
"""
import maxminddb
with maxminddb.open_database(str(mmdb_path)) as reader:
record = reader.get(ip)
if not record:
raise GeoTimezoneError(f"egress IP {ip} not present in the geoip database")
tz = ((record.get("location") or {}) if isinstance(record, dict) else {}).get(
"time_zone"
)
if not tz:
raise GeoTimezoneError(f"no timezone for egress IP {ip} in the geoip database")
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
try:
ZoneInfo(tz)
except (ZoneInfoNotFoundError, ValueError) as exc:
raise GeoTimezoneError(
f"geoip returned an invalid IANA zone {tz!r} for {ip}: {exc}"
) from exc
return tz
def resolve_session_timezone(
timezone: str, proxy: Optional[Dict[str, str]]
) -> str:
"""Map the user's ``timezone`` setting to a concrete IANA zone (or ``""``).
See the module docstring for the full precedence table. Raises
:class:`GeoTimezoneError` when a proxy is set but the egress timezone
cannot be resolved (fail-early never silently use the host TZ behind a
foreign proxy).
"""
tz = (timezone or "").strip()
if tz.lower() in ("host", "local"):
return ""
if tz and tz.lower() != "auto":
return tz # explicit IANA wins
if not _proxy_is_set(proxy):
return "" # "" / "auto" without a proxy → host TZ
# proxy set, tz is "" (new default) or "auto" → resolve from egress.
assert proxy is not None
from .download import ensure_geoip_mmdb
ip = discover_egress_ip(proxy)
mmdb = ensure_geoip_mmdb()
return ip_to_timezone(ip, mmdb)

View file

@ -9,6 +9,7 @@ from typing import Any, Dict, Optional, Union
from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright
from ._fpforge import Profile, generate_profile
from ._geo import resolve_session_timezone
from ._headless import make_virtual_display
from ._proxy import configure_proxy as _configure_proxy_shared
from .download import ensure_binary
@ -75,6 +76,13 @@ class InvisiblePlaywright:
async def __aenter__(self) -> Union[Browser, BrowserContext]:
import sys as _sys
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
# concrete IANA zone before anything reads self._timezone. Run the
# blocking geo lookup off the event loop. Fail-early if a proxy is set
# but the egress zone can't be resolved.
self._timezone = await asyncio.to_thread(
resolve_session_timezone, self._timezone, self._proxy
)
executable = self._binary_path or ensure_binary()
prefs = translate_profile_to_prefs(
self._profile,

View file

@ -46,3 +46,19 @@ BINARY_ENTRY_REL = {
RELEASE_URL_TEMPLATE = (
"https://github.com/feder-cr/invisible_playwright/releases/download/{tag}/{asset}"
)
# ─────────────────────────────────────────────────────────────────────────
# GeoIP database (timezone="auto" → resolve IANA zone from proxy egress IP)
# ─────────────────────────────────────────────────────────────────────────
# daijro/geoip-all-in-one merges IP2Location LITE + GeoLite2 + DB-IP into a
# single mmdb (country ISO + coordinates + IANA timezone via tzfpy), rebuilt
# weekly. GPL-3.0, so we DOWNLOAD it at runtime into the user cache (like the
# Firefox binary) rather than bundling it into this MIT package. Pinned to a
# known-good weekly tag; bump to refresh. The `-all` variant covers IPv4+IPv6.
GEOIP_REPO: str = "daijro/geoip-all-in-one"
GEOIP_MMDB_VERSION: str = "2026.06.03"
GEOIP_ASSET: str = "geoip-aio-all.mmdb.zip"
GEOIP_MMDB_NAME: str = "geoip-aio-all.mmdb"
GEOIP_RELEASE_URL_TEMPLATE: str = (
"https://github.com/daijro/geoip-all-in-one/releases/download/{tag}/{asset}"
)

View file

@ -18,6 +18,10 @@ from .constants import (
ARCHIVE_NAME,
BINARY_ENTRY_REL,
BINARY_VERSION,
GEOIP_ASSET,
GEOIP_MMDB_NAME,
GEOIP_MMDB_VERSION,
GEOIP_RELEASE_URL_TEMPLATE,
RELEASE_URL_TEMPLATE,
)
@ -151,3 +155,49 @@ def ensure_binary(version: str = BINARY_VERSION) -> Path:
if not entry.exists():
raise RuntimeError(f"binary not found after extraction: {entry}")
return entry
# ─────────────────────────────────────────────────────────────────────────
# GeoIP mmdb (used by timezone="auto" to map proxy egress IP → IANA zone)
# ─────────────────────────────────────────────────────────────────────────
def geoip_mmdb_path(version: str = GEOIP_MMDB_VERSION) -> Path:
"""Cache location for the extracted geoip mmdb."""
return cache_root() / "geoip" / version / GEOIP_MMDB_NAME
def ensure_geoip_mmdb(version: str = GEOIP_MMDB_VERSION) -> Path:
"""Return a path to the geoip mmdb, downloading + caching it if needed.
Set ``STEALTHFOX_GEOIP_MMDB`` to point at a user-supplied mmdb (or a test
fixture) to skip the download entirely. Otherwise the pinned weekly build
of ``daijro/geoip-all-in-one`` is fetched from GitHub Releases (public, no
token) into the user cache and unzipped once.
"""
override = os.environ.get("STEALTHFOX_GEOIP_MMDB")
if override:
p = Path(override)
if not p.exists():
raise RuntimeError(
f"STEALTHFOX_GEOIP_MMDB points to a missing file: {p}"
)
return p
dst = geoip_mmdb_path(version)
if dst.exists():
return dst
url = GEOIP_RELEASE_URL_TEMPLATE.format(tag=version, asset=GEOIP_ASSET)
dst.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory() as td:
archive = Path(td) / GEOIP_ASSET
_download_file(url, archive)
_extract(archive, dst.parent)
if dst.exists():
return dst
# The asset name inside the zip may differ from GEOIP_MMDB_NAME — fall
# back to the first .mmdb the archive produced.
candidates = sorted(dst.parent.glob("*.mmdb"))
if candidates:
return candidates[0]
raise RuntimeError(f"geoip mmdb not found after extraction in {dst.parent}")

View file

@ -8,6 +8,7 @@ from typing import Any, Dict, Optional, Union
from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright
from ._fpforge import Profile, generate_profile
from ._geo import resolve_session_timezone
from ._headless import make_virtual_display
from ._proxy import configure_proxy as _configure_proxy_shared
from .download import ensure_binary
@ -178,6 +179,10 @@ class InvisiblePlaywright:
self._virtual_display: Any = None
def __enter__(self) -> Union[Browser, BrowserContext]:
# Resolve timezone="auto" (and the proxy-set-but-unset default) to a
# concrete IANA zone before anything reads self._timezone. Fail-early
# if a proxy is set but the egress zone can't be resolved.
self._timezone = resolve_session_timezone(self._timezone, self._proxy)
executable = self._binary_path or ensure_binary()
prefs = self._build_prefs()
playwright_proxy = _configure_proxy_shared(self._proxy, prefs)

263
tests/test_geo.py Normal file
View file

@ -0,0 +1,263 @@
"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution).
Covers: the precedence policy (resolve_session_timezone), proxyrequests
translation, egress IP discovery (mocked HTTP), and IPIANA mapping (mocked
mmdb). No real network or mmdb is touched.
"""
import sys
import types
import pytest
from invisible_playwright import _geo
from invisible_playwright._geo import (
GeoTimezoneError,
_proxies_for_requests,
_proxy_is_set,
discover_egress_ip,
ip_to_timezone,
resolve_session_timezone,
)
SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"}
HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"}
# ──────────────────────────────────────────────────────────────────────
# _proxy_is_set
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
@pytest.mark.parametrize(
"proxy,expected",
[
(None, False),
({}, False),
({"server": ""}, False),
({"server": " "}, False),
({"server": "direct://"}, False),
({"server": "DIRECT://"}, False),
({"server": "socks5://h:1"}, True),
({"server": "http://h:8080"}, True),
],
)
def test_proxy_is_set(proxy, expected):
assert _proxy_is_set(proxy) is expected
# ──────────────────────────────────────────────────────────────────────
# _proxies_for_requests — scheme + credential translation
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_proxies_socks5_uses_socks5h_remote_dns():
out = _proxies_for_requests(SOCKS)
assert out["http"] == "socks5h://u:p@gw.example:1080"
assert out["https"] == out["http"]
@pytest.mark.unit
def test_proxies_socks4_scheme():
out = _proxies_for_requests({"server": "socks4://gw:1080"})
assert out["http"] == "socks4://gw:1080"
@pytest.mark.unit
def test_proxies_http_and_https_schemes():
assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080"
out = _proxies_for_requests({"server": "https://gw:8443"})
assert out["https"] == "https://gw:8443"
@pytest.mark.unit
def test_proxies_no_scheme_defaults_to_http():
out = _proxies_for_requests({"server": "gw.example:3128"})
assert out["http"] == "http://gw.example:3128"
@pytest.mark.unit
def test_proxies_credentials_are_url_encoded():
out = _proxies_for_requests(
{"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"}
)
# '@', ':' and '/' in creds must be percent-encoded so they don't break
# the proxy URL parsing.
assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"]
@pytest.mark.unit
def test_proxies_no_credentials_has_no_auth_prefix():
out = _proxies_for_requests({"server": "socks5://gw:1080"})
assert out["http"] == "socks5h://gw:1080"
# ──────────────────────────────────────────────────────────────────────
# discover_egress_ip — mocked requests
# ──────────────────────────────────────────────────────────────────────
class _FakeResp:
def __init__(self, text, status=200):
self.text = text
self._status = status
def raise_for_status(self):
if self._status >= 400:
raise RuntimeError(f"HTTP {self._status}")
@pytest.mark.unit
def test_discover_egress_ip_first_endpoint_wins(monkeypatch):
calls = []
def fake_get(url, **kw):
calls.append(url)
return _FakeResp("203.0.113.7\n")
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(SOCKS) == "203.0.113.7"
assert len(calls) == 1 # stopped at the first success
@pytest.mark.unit
def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch):
seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")])
def fake_get(url, **kw):
return next(seq)
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(HTTP) == "198.51.100.42"
@pytest.mark.unit
def test_discover_egress_ip_all_fail_raises(monkeypatch):
def fake_get(url, **kw):
raise OSError("connection refused")
monkeypatch.setattr(_geo.requests, "get", fake_get)
with pytest.raises(GeoTimezoneError):
discover_egress_ip(SOCKS)
# ──────────────────────────────────────────────────────────────────────
# ip_to_timezone — mocked mmdb reader
# ──────────────────────────────────────────────────────────────────────
class _FakeReader:
def __init__(self, record):
self._record = record
def __enter__(self):
return self
def __exit__(self, *a):
return False
def get(self, ip):
return self._record
def _install_fake_maxminddb(monkeypatch, record):
mod = types.ModuleType("maxminddb")
mod.open_database = lambda path: _FakeReader(record)
monkeypatch.setitem(sys.modules, "maxminddb", mod)
@pytest.mark.unit
def test_ip_to_timezone_reads_location_time_zone(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}})
assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome"
@pytest.mark.unit
def test_ip_to_timezone_ip_absent_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, None)
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
@pytest.mark.unit
def test_ip_to_timezone_missing_zone_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {}})
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
@pytest.mark.unit
def test_ip_to_timezone_invalid_iana_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}})
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
# ──────────────────────────────────────────────────────────────────────
# resolve_session_timezone — the precedence policy
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def stub_egress(monkeypatch):
"""Make egress resolution deterministic + offline; record if it ran."""
state = {"called": False}
def fake_discover(proxy, **kw):
state["called"] = True
return "203.0.113.7"
monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover)
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York")
# ensure_geoip_mmdb is imported from .download at call time
import invisible_playwright.download as dl
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
return state
@pytest.mark.unit
@pytest.mark.parametrize("sentinel", ["host", "local", "HOST", "Local"])
def test_resolve_host_sentinel_forces_host_tz(sentinel, stub_egress):
# Even with a proxy set, "host"/"local" force the host TZ and never resolve.
assert resolve_session_timezone(sentinel, SOCKS) == ""
assert stub_egress["called"] is False
@pytest.mark.unit
def test_resolve_explicit_iana_wins_over_proxy(stub_egress):
assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo"
assert stub_egress["called"] is False # no resolution when explicit
@pytest.mark.unit
def test_resolve_empty_no_proxy_is_host(stub_egress):
assert resolve_session_timezone("", None) == ""
assert stub_egress["called"] is False
@pytest.mark.unit
def test_resolve_auto_no_proxy_is_host(stub_egress):
assert resolve_session_timezone("auto", None) == ""
assert stub_egress["called"] is False
@pytest.mark.unit
def test_resolve_empty_with_proxy_defaults_to_auto(stub_egress):
# NEW default: a proxy with no timezone auto-resolves from the egress.
assert resolve_session_timezone("", SOCKS) == "America/New_York"
assert stub_egress["called"] is True
@pytest.mark.unit
def test_resolve_auto_with_proxy_resolves(stub_egress):
assert resolve_session_timezone("auto", HTTP) == "America/New_York"
assert stub_egress["called"] is True
@pytest.mark.unit
def test_resolve_direct_proxy_treated_as_no_proxy(stub_egress):
assert resolve_session_timezone("auto", {"server": "direct://"}) == ""
assert stub_egress["called"] is False
@pytest.mark.unit
def test_resolve_fail_early_propagates(monkeypatch):
# With a proxy set, a discovery failure must raise — never silent host TZ.
def boom(proxy, **kw):
raise GeoTimezoneError("no egress")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
with pytest.raises(GeoTimezoneError):
resolve_session_timezone("auto", SOCKS)