invisible_playwright/tests/test_geo.py
feder-cr f2664f96e1 feat: timezone="auto" resolves from any egress + weekly geoip auto-update
Refine timezone="auto" so it ALWAYS resolves (drop the "host" sentinel):
- ""/"auto" resolve from the proxy egress when a proxy is set, else from the
  host own public IP (direct lookup); an explicit zone is the only opt-out.
- on failure: with a proxy raise; without a proxy fall back to the host TZ.

GeoIP DB now auto-updates against daijro/geoip-all-in-one weekly rebuild:
cache the latest, re-check after GEOIP_REFRESH_DAYS (7), prune old tags,
reuse a stale cache offline; GEOIP_MMDB_VERSION is only the cold fallback.

tests: test_geo.py (37) + test_geoip_update.py; full unit suite 429 green
plus 8 live combinations (proxy / no-proxy / explicit / failing / freshness).
2026-06-06 05:16:20 +02:00

288 lines
11 KiB
Python

"""Unit tests for `invisible_playwright._geo` (timezone="auto" resolution).
Covers: the precedence policy (resolve_session_timezone), proxy→requests
translation, egress IP discovery (mocked HTTP), and IP→IANA mapping (mocked
mmdb). No real network or mmdb is touched.
"""
import sys
import types
import pytest
from invisible_playwright import _geo
from invisible_playwright._geo import (
GeoTimezoneError,
_proxies_for_requests,
_proxy_is_set,
discover_egress_ip,
ip_to_timezone,
resolve_session_timezone,
)
SOCKS = {"server": "socks5://gw.example:1080", "username": "u", "password": "p"}
HTTP = {"server": "http://gw.example:8080", "username": "u", "password": "p"}
# ──────────────────────────────────────────────────────────────────────
# _proxy_is_set
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
@pytest.mark.parametrize(
"proxy,expected",
[
(None, False),
({}, False),
({"server": ""}, False),
({"server": " "}, False),
({"server": "direct://"}, False),
({"server": "DIRECT://"}, False),
({"server": "socks5://h:1"}, True),
({"server": "http://h:8080"}, True),
],
)
def test_proxy_is_set(proxy, expected):
assert _proxy_is_set(proxy) is expected
# ──────────────────────────────────────────────────────────────────────
# _proxies_for_requests — scheme + credential translation
# ──────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_proxies_socks5_uses_socks5h_remote_dns():
out = _proxies_for_requests(SOCKS)
assert out["http"] == "socks5h://u:p@gw.example:1080"
assert out["https"] == out["http"]
@pytest.mark.unit
def test_proxies_socks4_scheme():
out = _proxies_for_requests({"server": "socks4://gw:1080"})
assert out["http"] == "socks4://gw:1080"
@pytest.mark.unit
def test_proxies_http_and_https_schemes():
assert _proxies_for_requests(HTTP)["http"] == "http://u:p@gw.example:8080"
out = _proxies_for_requests({"server": "https://gw:8443"})
assert out["https"] == "https://gw:8443"
@pytest.mark.unit
def test_proxies_no_scheme_defaults_to_http():
out = _proxies_for_requests({"server": "gw.example:3128"})
assert out["http"] == "http://gw.example:3128"
@pytest.mark.unit
def test_proxies_credentials_are_url_encoded():
out = _proxies_for_requests(
{"server": "socks5://gw:1080", "username": "user@x", "password": "p:w/d"}
)
# '@', ':' and '/' in creds must be percent-encoded so they don't break
# the proxy URL parsing.
assert "user%40x:p%3Aw%2Fd@gw:1080" in out["http"]
@pytest.mark.unit
def test_proxies_no_credentials_has_no_auth_prefix():
out = _proxies_for_requests({"server": "socks5://gw:1080"})
assert out["http"] == "socks5h://gw:1080"
# ──────────────────────────────────────────────────────────────────────
# discover_egress_ip — mocked requests
# ──────────────────────────────────────────────────────────────────────
class _FakeResp:
def __init__(self, text, status=200):
self.text = text
self._status = status
def raise_for_status(self):
if self._status >= 400:
raise RuntimeError(f"HTTP {self._status}")
@pytest.mark.unit
def test_discover_egress_ip_first_endpoint_wins(monkeypatch):
calls = []
def fake_get(url, **kw):
calls.append(url)
return _FakeResp("203.0.113.7\n")
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(SOCKS) == "203.0.113.7"
assert len(calls) == 1 # stopped at the first success
@pytest.mark.unit
def test_discover_egress_ip_falls_through_to_next_on_error(monkeypatch):
seq = iter([_FakeResp("junk-not-an-ip"), _FakeResp("198.51.100.42")])
def fake_get(url, **kw):
return next(seq)
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(HTTP) == "198.51.100.42"
@pytest.mark.unit
def test_discover_egress_ip_all_fail_raises(monkeypatch):
def fake_get(url, **kw):
raise OSError("connection refused")
monkeypatch.setattr(_geo.requests, "get", fake_get)
with pytest.raises(GeoTimezoneError):
discover_egress_ip(SOCKS)
@pytest.mark.unit
def test_discover_egress_ip_no_proxy_is_direct(monkeypatch):
# proxy=None → direct request, requests.get must get proxies=None.
seen = {}
def fake_get(url, **kw):
seen["proxies"] = kw.get("proxies", "MISSING")
return _FakeResp("192.0.2.55")
monkeypatch.setattr(_geo.requests, "get", fake_get)
assert discover_egress_ip(None) == "192.0.2.55"
assert seen["proxies"] is None
# ──────────────────────────────────────────────────────────────────────
# ip_to_timezone — mocked mmdb reader
# ──────────────────────────────────────────────────────────────────────
class _FakeReader:
def __init__(self, record):
self._record = record
def __enter__(self):
return self
def __exit__(self, *a):
return False
def get(self, ip):
return self._record
def _install_fake_maxminddb(monkeypatch, record):
mod = types.ModuleType("maxminddb")
mod.open_database = lambda path: _FakeReader(record)
monkeypatch.setitem(sys.modules, "maxminddb", mod)
@pytest.mark.unit
def test_ip_to_timezone_reads_location_time_zone(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Europe/Rome"}})
assert ip_to_timezone("1.2.3.4", "x.mmdb") == "Europe/Rome"
@pytest.mark.unit
def test_ip_to_timezone_ip_absent_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, None)
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
@pytest.mark.unit
def test_ip_to_timezone_missing_zone_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {}})
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
@pytest.mark.unit
def test_ip_to_timezone_invalid_iana_raises(monkeypatch):
_install_fake_maxminddb(monkeypatch, {"location": {"time_zone": "Not/AZone"}})
with pytest.raises(GeoTimezoneError):
ip_to_timezone("1.2.3.4", "x.mmdb")
# ──────────────────────────────────────────────────────────────────────
# resolve_session_timezone — the precedence policy
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def stub_egress(monkeypatch):
"""Make egress resolution deterministic + offline; record if it ran."""
state = {"called": False}
def fake_discover(proxy=None, **kw):
state["called"] = True
state["proxy_arg"] = proxy
return "203.0.113.7"
monkeypatch.setattr(_geo, "discover_egress_ip", fake_discover)
monkeypatch.setattr(_geo, "ip_to_timezone", lambda ip, mmdb: "America/New_York")
# ensure_geoip_mmdb is imported from .download at call time
import invisible_playwright.download as dl
monkeypatch.setattr(dl, "ensure_geoip_mmdb", lambda *a, **k: "fake.mmdb")
return state
@pytest.mark.unit
def test_resolve_explicit_iana_wins(stub_egress):
# An explicit zone wins and never triggers resolution (proxy or not).
assert resolve_session_timezone("Asia/Tokyo", SOCKS) == "Asia/Tokyo"
assert resolve_session_timezone("Asia/Tokyo", None) == "Asia/Tokyo"
assert stub_egress["called"] is False
@pytest.mark.unit
def test_resolve_empty_with_proxy_resolves_from_proxy(stub_egress):
assert resolve_session_timezone("", SOCKS) == "America/New_York"
assert stub_egress["called"] is True
assert stub_egress["proxy_arg"] == SOCKS # routed through the proxy
@pytest.mark.unit
def test_resolve_auto_with_proxy_resolves_from_proxy(stub_egress):
assert resolve_session_timezone("auto", HTTP) == "America/New_York"
assert stub_egress["proxy_arg"] == HTTP
@pytest.mark.unit
def test_resolve_empty_no_proxy_resolves_from_host(stub_egress):
# auto ALWAYS resolves — without a proxy, from the host's own public IP.
assert resolve_session_timezone("", None) == "America/New_York"
assert stub_egress["called"] is True
assert stub_egress["proxy_arg"] is None # direct request, no proxy
@pytest.mark.unit
def test_resolve_auto_no_proxy_resolves_from_host(stub_egress):
assert resolve_session_timezone("auto", None) == "America/New_York"
assert stub_egress["proxy_arg"] is None
@pytest.mark.unit
def test_resolve_direct_proxy_resolves_via_host(stub_egress):
# direct:// counts as "no proxy" → resolve from the host IP, don't skip.
assert resolve_session_timezone("auto", {"server": "direct://"}) == "America/New_York"
assert stub_egress["proxy_arg"] is None
@pytest.mark.unit
def test_resolve_no_proxy_failure_falls_back_to_host(monkeypatch):
# Without a proxy, a lookup failure must NOT break the launch → host TZ ("").
def boom(proxy=None, **kw):
raise GeoTimezoneError("offline")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
assert resolve_session_timezone("auto", None) == ""
assert resolve_session_timezone("", None) == ""
@pytest.mark.unit
def test_resolve_proxy_failure_raises(monkeypatch):
# With a proxy set, a failure must raise — never a silent host-TZ fallback.
def boom(proxy=None, **kw):
raise GeoTimezoneError("no egress")
monkeypatch.setattr(_geo, "discover_egress_ip", boom)
with pytest.raises(GeoTimezoneError):
resolve_session_timezone("auto", SOCKS)
with pytest.raises(GeoTimezoneError):
resolve_session_timezone("", SOCKS)