mirror of
https://github.com/feder-cr/invisible_playwright.git
synced 2026-06-07 08:35:12 +02:00
Address proxy timezone and platform docs
This commit is contained in:
parent
143aff4bd2
commit
8f0f86709d
11 changed files with 321 additions and 15 deletions
86
README.md
86
README.md
|
|
@ -59,7 +59,8 @@ pip install git+https://github.com/feder-cr/invisible_playwright.git
|
|||
python -m invisible_playwright fetch # one-time ~100 MB download, SHA256-verified
|
||||
```
|
||||
|
||||
Supported platforms: **Windows x86_64**, **Linux x86_64**.
|
||||
Supported platforms: **Windows x86_64**, **Linux x86_64**, and **Linux arm64**
|
||||
when the matching release asset is published.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -141,6 +142,28 @@ with InvisiblePlaywright(proxy=proxy) as browser:
|
|||
|
||||
Schemes supported: `socks5`, `socks4`, `http`, `https`. Auth works on all of them (SOCKS5 via patched `nsProtocolProxyService.cpp`, HTTP/HTTPS via Playwright). DNS is routed through the proxy by default, no local leak.
|
||||
|
||||
If your proxy exits in a different timezone than the host machine, set
|
||||
`timezone="auto"` to resolve the timezone through the proxy before Firefox
|
||||
starts:
|
||||
|
||||
```python
|
||||
with InvisiblePlaywright(proxy=proxy, timezone="auto") as browser:
|
||||
...
|
||||
```
|
||||
|
||||
This makes the generated Firefox prefs, Playwright `timezone_id`, and process
|
||||
`TZ` agree with the proxy egress region. If the lookup fails, launch fails
|
||||
early so you can choose an explicit timezone instead:
|
||||
|
||||
```python
|
||||
with InvisiblePlaywright(proxy=proxy, timezone="Europe/Vienna") as browser:
|
||||
...
|
||||
```
|
||||
|
||||
SOCKS proxy timezone lookup requires the package dependency installed with
|
||||
SOCKS support, which is included by default in this project via
|
||||
`requests[socks]`.
|
||||
|
||||
### Pinning specific fingerprint fields
|
||||
|
||||
By default everything comes from `seed`. To force specific values while the rest stays seed-derived:
|
||||
|
|
@ -193,6 +216,67 @@ async with async_playwright() as p:
|
|||
|
||||
For everyday Python usage the `InvisiblePlaywright` context manager is still the recommended entry point.
|
||||
|
||||
## TypeScript / Node usage
|
||||
|
||||
There is no native TypeScript SDK yet, but Node Playwright can launch the
|
||||
patched Firefox binary directly. Install and fetch the binary with Python:
|
||||
|
||||
```bash
|
||||
pip install git+https://github.com/feder-cr/invisible_playwright.git
|
||||
python -m invisible_playwright fetch
|
||||
python -m invisible_playwright path
|
||||
```
|
||||
|
||||
Then pass the printed path to Playwright in TypeScript:
|
||||
|
||||
```typescript
|
||||
import { firefox } from "playwright";
|
||||
|
||||
const browser = await firefox.launch({
|
||||
executablePath: "/absolute/path/from/invisible_playwright/path",
|
||||
headless: false,
|
||||
firefoxUserPrefs: {
|
||||
"zoom.stealth.timezone": "America/New_York",
|
||||
"invisible_playwright.humanize": true,
|
||||
},
|
||||
});
|
||||
|
||||
const page = await browser.newPage({
|
||||
timezoneId: "America/New_York",
|
||||
locale: "en-US",
|
||||
});
|
||||
```
|
||||
|
||||
For coherent sampled fingerprints from TypeScript, generate prefs through the
|
||||
Python helper and pass the resulting JSON into Node:
|
||||
|
||||
```bash
|
||||
python - <<'PY'
|
||||
import json
|
||||
from invisible_playwright import get_default_stealth_prefs
|
||||
print(json.dumps(get_default_stealth_prefs(seed=42, timezone="America/New_York")))
|
||||
PY
|
||||
```
|
||||
|
||||
## Linux arm64 builds
|
||||
|
||||
The wrapper resolves Linux arm64/aarch64 machines to this release asset name:
|
||||
|
||||
```text
|
||||
firefox-150.0.1-stealth-linux-arm64.tar.gz
|
||||
```
|
||||
|
||||
The archive should contain a runnable `firefox` entry at its root, matching the
|
||||
Linux x86_64 archive layout, and `checksums.txt` must include the arm64 asset
|
||||
SHA256. Once those release files exist, `python -m invisible_playwright fetch`
|
||||
will use them automatically on arm64 Linux.
|
||||
|
||||
At a high level, build the patched Firefox from
|
||||
[feder-cr/invisible_firefox](https://github.com/feder-cr/invisible_firefox) on
|
||||
an arm64 Linux host or runner, package the resulting Firefox directory as the
|
||||
asset above, add it to the same `BINARY_VERSION` release, and update
|
||||
`checksums.txt`.
|
||||
|
||||
## Related projects
|
||||
|
||||
invisible_playwright takes a different angle than the major Firefox-hardening projects but stands on their shoulders:
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ classifiers = [
|
|||
dependencies = [
|
||||
"playwright>=1.40",
|
||||
"platformdirs>=4",
|
||||
"requests>=2.31",
|
||||
"requests[socks]>=2.31",
|
||||
"tqdm>=4.66",
|
||||
"pywin32>=306; sys_platform == 'win32'",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -9,8 +9,11 @@ from __future__ import annotations
|
|||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
_SOCKS_SCHEMES = ("socks5://", "socks4://", "socks://")
|
||||
_DEFAULT_TIMEZONE_ENDPOINT = "https://ipapi.co/timezone/"
|
||||
|
||||
|
||||
def configure_proxy(
|
||||
|
|
@ -48,9 +51,85 @@ def configure_proxy(
|
|||
return None
|
||||
|
||||
|
||||
def resolve_proxy_timezone(
|
||||
proxy: Optional[Dict[str, str]],
|
||||
*,
|
||||
timeout: float = 6.0,
|
||||
endpoint: str = _DEFAULT_TIMEZONE_ENDPOINT,
|
||||
) -> str:
|
||||
"""Return the IANA timezone observed from the proxy egress IP.
|
||||
|
||||
``timezone="auto"`` in the launcher calls this before Firefox starts so
|
||||
Playwright's ``timezone_id`` and the process ``TZ`` env can be aligned with
|
||||
the proxy. The HTTP request is routed through the same proxy URL the caller
|
||||
provided. SOCKS proxies require the package's ``requests[socks]`` dependency.
|
||||
"""
|
||||
if not proxy:
|
||||
raise ValueError("timezone='auto' requires a proxy")
|
||||
|
||||
server = (proxy.get("server") or "").strip()
|
||||
if not server or server.lower() == "direct://":
|
||||
raise ValueError("timezone='auto' requires a non-direct proxy")
|
||||
|
||||
proxies = _requests_proxies(proxy)
|
||||
try:
|
||||
response = requests.get(endpoint, proxies=proxies, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.InvalidSchema as exc:
|
||||
raise RuntimeError(
|
||||
"timezone='auto' with SOCKS proxies requires the PySocks extra; "
|
||||
"install invisible-playwright with requests[socks] support"
|
||||
) from exc
|
||||
except requests.RequestException as exc:
|
||||
raise RuntimeError(f"failed to resolve proxy timezone: {exc}") from exc
|
||||
|
||||
timezone = response.text.strip()
|
||||
if not _looks_like_iana_timezone(timezone):
|
||||
raise RuntimeError(f"proxy timezone endpoint returned invalid timezone: {timezone!r}")
|
||||
return timezone
|
||||
|
||||
|
||||
def _is_socks_scheme(server: str) -> bool:
|
||||
return server.lower().startswith(_SOCKS_SCHEMES)
|
||||
|
||||
|
||||
def _strip_scheme(server: str) -> str:
|
||||
return server.split("://", 1)[1] if "://" in server else server
|
||||
|
||||
|
||||
def _requests_proxies(proxy: Dict[str, str]) -> Dict[str, str]:
|
||||
server = (proxy.get("server") or "").strip()
|
||||
proxy_url = _proxy_url_with_auth(
|
||||
server,
|
||||
proxy.get("username") or "",
|
||||
proxy.get("password") or "",
|
||||
)
|
||||
return {"http": proxy_url, "https": proxy_url}
|
||||
|
||||
|
||||
def _proxy_url_with_auth(server: str, username: str, password: str) -> str:
|
||||
if not username and not password:
|
||||
return server
|
||||
|
||||
from urllib.parse import quote, urlsplit, urlunsplit
|
||||
|
||||
parts = urlsplit(server)
|
||||
if not parts.scheme or not parts.netloc:
|
||||
return server
|
||||
|
||||
credentials = quote(username, safe="")
|
||||
if password:
|
||||
credentials += ":" + quote(password, safe="")
|
||||
return urlunsplit((
|
||||
parts.scheme,
|
||||
f"{credentials}@{parts.netloc}",
|
||||
parts.path,
|
||||
parts.query,
|
||||
parts.fragment,
|
||||
))
|
||||
|
||||
|
||||
def _looks_like_iana_timezone(value: str) -> bool:
|
||||
if not value or "/" not in value:
|
||||
return False
|
||||
return all(part and ".." not in part for part in value.split("/"))
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from playwright.async_api import Browser, BrowserContext, Playwright, async_play
|
|||
from ._fpforge import Profile, generate_profile
|
||||
from ._headless import make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from ._proxy import resolve_proxy_timezone
|
||||
from .download import ensure_binary
|
||||
from .launcher import _CHROME_H, _CHROME_W, _TASKBAR_H, _tz_env
|
||||
from .prefs import translate_profile_to_prefs
|
||||
|
|
@ -61,7 +62,7 @@ class InvisiblePlaywright:
|
|||
self._extra_args = list(extra_args or [])
|
||||
self._humanize = humanize
|
||||
self._locale = locale
|
||||
self._timezone = timezone
|
||||
self._timezone = self._resolve_timezone(timezone, proxy)
|
||||
self._extra_prefs = extra_prefs
|
||||
self._binary_path = binary_path
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
|
|
@ -73,6 +74,12 @@ class InvisiblePlaywright:
|
|||
self._persistent_context: Optional[BrowserContext] = None
|
||||
self._virtual_display: Any = None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_timezone(timezone: str, proxy: Optional[Dict[str, str]]) -> str:
|
||||
if timezone == "auto":
|
||||
return resolve_proxy_timezone(proxy)
|
||||
return timezone
|
||||
|
||||
async def __aenter__(self) -> Union[Browser, BrowserContext]:
|
||||
import sys as _sys
|
||||
executable = self._binary_path or ensure_binary()
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ def ARCHIVE_NAME(platform_key: str, machine: str) -> str:
|
|||
m = machine.lower()
|
||||
if m in {"amd64", "x86_64"}:
|
||||
arch = "x86_64"
|
||||
elif pk == "linux" and m in {"arm64", "aarch64"}:
|
||||
arch = "arm64"
|
||||
else:
|
||||
raise NotImplementedError(f"unsupported arch: {machine}")
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwr
|
|||
from ._fpforge import Profile, generate_profile
|
||||
from ._headless import make_virtual_display
|
||||
from ._proxy import configure_proxy as _configure_proxy_shared
|
||||
from ._proxy import resolve_proxy_timezone
|
||||
from .download import ensure_binary
|
||||
from .prefs import translate_profile_to_prefs
|
||||
|
||||
|
|
@ -135,8 +136,9 @@ class InvisiblePlaywright:
|
|||
a float caps the motion in seconds.
|
||||
locale: BCP-47 tag (e.g. ``"en-US"``). Drives the
|
||||
``Accept-Language`` header and ``navigator.language``.
|
||||
timezone: IANA timezone (e.g. ``"America/New_York"``). Empty
|
||||
means use the host TZ.
|
||||
timezone: IANA timezone (e.g. ``"America/New_York"``), or
|
||||
``"auto"`` to resolve the timezone from the configured proxy
|
||||
before launch. Empty means use the host TZ.
|
||||
extra_prefs: Optional dict of Firefox prefs overlayed on top
|
||||
of the generated profile — useful for niche tweaks
|
||||
without monkey-patching the package.
|
||||
|
|
@ -163,7 +165,7 @@ class InvisiblePlaywright:
|
|||
self._extra_args = list(extra_args or [])
|
||||
self._humanize = humanize
|
||||
self._locale = locale
|
||||
self._timezone = timezone
|
||||
self._timezone = self._resolve_timezone(timezone, proxy)
|
||||
self._extra_prefs = extra_prefs
|
||||
self._binary_path = binary_path
|
||||
self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None
|
||||
|
|
@ -177,6 +179,12 @@ class InvisiblePlaywright:
|
|||
self._persistent_context: Optional[BrowserContext] = None
|
||||
self._virtual_display: Any = None
|
||||
|
||||
@staticmethod
|
||||
def _resolve_timezone(timezone: str, proxy: Optional[Dict[str, str]]) -> str:
|
||||
if timezone == "auto":
|
||||
return resolve_proxy_timezone(proxy)
|
||||
return timezone
|
||||
|
||||
def __enter__(self) -> Union[Browser, BrowserContext]:
|
||||
executable = self._binary_path or ensure_binary()
|
||||
prefs = self._build_prefs()
|
||||
|
|
@ -369,4 +377,3 @@ class InvisiblePlaywright:
|
|||
if self._humanize is True:
|
||||
return 1.5
|
||||
return float(self._humanize)
|
||||
|
||||
|
|
|
|||
|
|
@ -81,3 +81,21 @@ def test_async_default_context_kwargs_match_sync():
|
|||
a = AsyncIP(seed=42, timezone="America/New_York", locale="de-DE")
|
||||
s = SyncIP(seed=42, timezone="America/New_York", locale="de-DE")
|
||||
assert a._default_context_kwargs() == s._default_context_kwargs()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_async_timezone_auto_resolves_from_proxy(monkeypatch):
|
||||
calls = []
|
||||
|
||||
def fake_resolve(proxy):
|
||||
calls.append(proxy)
|
||||
return "Europe/Vienna"
|
||||
|
||||
monkeypatch.setattr("invisible_playwright.async_api.resolve_proxy_timezone", fake_resolve)
|
||||
|
||||
proxy = {"server": "socks5://host:1080"}
|
||||
ip = AsyncIP(seed=42, proxy=proxy, timezone="auto")
|
||||
|
||||
assert ip._timezone == "Europe/Vienna"
|
||||
assert ip._default_context_kwargs()["timezone_id"] == "Europe/Vienna"
|
||||
assert calls == [proxy]
|
||||
|
|
|
|||
|
|
@ -55,6 +55,8 @@ def test_binary_basename_format():
|
|||
("win32", "x86_64", "win-x86_64.zip"), # mingw-style
|
||||
("linux", "x86_64", "linux-x86_64.tar.gz"), # standard Linux
|
||||
("linux", "AMD64", "linux-x86_64.tar.gz"), # odd but plausible
|
||||
("linux", "arm64", "linux-arm64.tar.gz"), # Linux ARM64
|
||||
("linux", "aarch64", "linux-arm64.tar.gz"), # Linux ARM64 alias
|
||||
("Linux", "x86_64", "linux-x86_64.tar.gz"), # case-insensitive platform
|
||||
("WIN32", "AMD64", "win-x86_64.zip"), # ALL CAPS platform
|
||||
])
|
||||
|
|
@ -77,13 +79,10 @@ def test_archive_name_rejects_unsupported_arches(machine):
|
|||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.parametrize("machine", ["arm64", "aarch64"])
|
||||
def test_archive_name_arm64_not_yet_supported(machine):
|
||||
"""ARM64 is a frequent request (issue #6). Until binaries exist for it,
|
||||
ARCHIVE_NAME should hard-fail rather than silently degrade. If this test
|
||||
starts failing because someone shipped ARM64 builds, replace it with the
|
||||
positive case."""
|
||||
with pytest.raises(NotImplementedError):
|
||||
ARCHIVE_NAME("linux", machine)
|
||||
def test_archive_name_linux_arm64_supported(machine):
|
||||
"""ARM64 Linux builds use a stable archive name when release assets exist."""
|
||||
name = ARCHIVE_NAME("linux", machine)
|
||||
assert name.endswith("linux-arm64.tar.gz")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
|
|
@ -110,6 +109,7 @@ def test_binary_entry_rel_covers_every_supported_platform():
|
|||
f"ARCHIVE_NAME accepts {plat!r} but BINARY_ENTRY_REL has no entry "
|
||||
f"— ensure_binary() will fail late after a 110 MB download."
|
||||
)
|
||||
ARCHIVE_NAME("linux", "arm64") # same Linux archive layout and entry path
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
|
|
|
|||
|
|
@ -321,6 +321,8 @@ def test_ensure_binary_accepts_binary_mode_checksums(tmp_path, monkeypatch):
|
|||
|
||||
# Force the platform branch the test mocks:
|
||||
monkeypatch.setattr("sys.platform", "win32")
|
||||
import platform
|
||||
monkeypatch.setattr(platform, "machine", lambda: "AMD64")
|
||||
out = ensure_binary()
|
||||
# No RuntimeError means the parser accepted the `*`-prefixed key.
|
||||
assert out.exists()
|
||||
|
|
|
|||
|
|
@ -150,6 +150,24 @@ def test_default_context_includes_timezone_when_set():
|
|||
assert ip._default_context_kwargs()["timezone_id"] == "America/New_York"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_timezone_auto_resolves_from_proxy(monkeypatch):
|
||||
calls = []
|
||||
|
||||
def fake_resolve(proxy):
|
||||
calls.append(proxy)
|
||||
return "Europe/Vienna"
|
||||
|
||||
monkeypatch.setattr("invisible_playwright.launcher.resolve_proxy_timezone", fake_resolve)
|
||||
|
||||
proxy = {"server": "socks5://host:1080"}
|
||||
ip = InvisiblePlaywright(seed=42, proxy=proxy, timezone="auto")
|
||||
|
||||
assert ip._timezone == "Europe/Vienna"
|
||||
assert ip._default_context_kwargs()["timezone_id"] == "Europe/Vienna"
|
||||
assert calls == [proxy]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_default_context_omits_timezone_when_empty():
|
||||
"""Default ``timezone=""`` means "let the host TZ leak through" —
|
||||
|
|
|
|||
|
|
@ -4,8 +4,13 @@ Decision-table coverage of every input partition: None/empty/direct,
|
|||
SOCKS4/5/default, HTTP/HTTPS, case variants, malformed, mutation contract.
|
||||
"""
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from invisible_playwright._proxy import configure_proxy
|
||||
from invisible_playwright._proxy import (
|
||||
configure_proxy,
|
||||
resolve_proxy_timezone,
|
||||
_proxy_url_with_auth,
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -258,6 +263,90 @@ def test_socks_port_coerced_to_int():
|
|||
assert isinstance(prefs["network.proxy.socks_port"], int)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Proxy timezone auto-resolution
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, text="Europe/Vienna") -> None:
|
||||
self.text = text
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxy_url_with_auth_percent_encodes_credentials():
|
||||
out = _proxy_url_with_auth("socks5://host:1080", "user@example.com", "p/a:ss")
|
||||
assert out == "socks5://user%40example.com:p%2Fa%3Ass@host:1080"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_proxy_url_without_auth_returns_server_unchanged():
|
||||
assert _proxy_url_with_auth("socks5://host:1080", "", "") == "socks5://host:1080"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_proxy_timezone_routes_request_through_proxy(monkeypatch):
|
||||
calls = []
|
||||
|
||||
def fake_get(url, *, proxies, timeout):
|
||||
calls.append((url, proxies, timeout))
|
||||
return _FakeResponse("Europe/Vienna\n")
|
||||
|
||||
monkeypatch.setattr("invisible_playwright._proxy.requests.get", fake_get)
|
||||
|
||||
timezone = resolve_proxy_timezone(
|
||||
{"server": "socks5://host:1080", "username": "u", "password": "p"},
|
||||
timeout=1.5,
|
||||
endpoint="https://example.test/timezone",
|
||||
)
|
||||
|
||||
assert timezone == "Europe/Vienna"
|
||||
assert calls == [(
|
||||
"https://example.test/timezone",
|
||||
{
|
||||
"http": "socks5://u:p@host:1080",
|
||||
"https": "socks5://u:p@host:1080",
|
||||
},
|
||||
1.5,
|
||||
)]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_proxy_timezone_rejects_missing_proxy():
|
||||
with pytest.raises(ValueError, match="requires a proxy"):
|
||||
resolve_proxy_timezone(None)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_proxy_timezone_rejects_direct_proxy():
|
||||
with pytest.raises(ValueError, match="non-direct proxy"):
|
||||
resolve_proxy_timezone({"server": "direct://"})
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_proxy_timezone_rejects_invalid_timezone(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"invisible_playwright._proxy.requests.get",
|
||||
lambda *args, **kwargs: _FakeResponse("not-a-zone"),
|
||||
)
|
||||
with pytest.raises(RuntimeError, match="invalid timezone"):
|
||||
resolve_proxy_timezone({"server": "http://host:8080"})
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_proxy_timezone_wraps_request_errors(monkeypatch):
|
||||
def fake_get(*args, **kwargs):
|
||||
raise requests.RequestException("network down")
|
||||
|
||||
monkeypatch.setattr("invisible_playwright._proxy.requests.get", fake_get)
|
||||
|
||||
with pytest.raises(RuntimeError, match="failed to resolve proxy timezone"):
|
||||
resolve_proxy_timezone({"server": "http://host:8080"})
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_socks_non_numeric_port_raises_value_error():
|
||||
"""Non-numeric port is a programmer error — int() raises."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue