From 22b117151872d5ad049c41d77f891631008658d4 Mon Sep 17 00:00:00 2001 From: feder-cr <85809106+feder-cr@users.noreply.github.com> Date: Thu, 21 May 2026 12:19:38 -0700 Subject: [PATCH] feat: persistent profile dir + C7 closure (firefox-5 / 0.1.6) - Add profile_dir= kwarg to InvisiblePlaywright (sync + async). Maps to firefox.launch_persistent_context(); returns a BrowserContext. Cookies / localStorage / extensions / cache / prefs all persisted. - Drop the firefox-4 era workaround that filtered locale + timezone_id out of the persistent kwargs. firefox-5 ships the C++ docShell.overrideTimezone IDL method (50 LOC patch in docshell/base/nsIDocShell.idl + nsDocShell.cpp, see patch.md section 19 in feder-cr/invisible-firefox), so per-realm overrides land without crashing the launch handshake. - Bump BINARY_VERSION firefox-4 -> firefox-5. - Sentinel unit tests added: persistent kwargs MUST include locale + timezone_id (defends against re-introducing the workaround) and must NOT include timezone_id when timezone="" is the "host TZ" sentinel. Validation: smoke test against the local firefox-5 build, persistent context UP in 21s (was 180s timeout), Intl.timeZone == Europe/London, hardwareConcurrency / screen / DPR / locale all reflect the PIN. --- CHANGELOG.md | 11 ++ pyproject.toml | 2 +- src/invisible_playwright/async_api.py | 32 +++- src/invisible_playwright/constants.py | 2 +- src/invisible_playwright/launcher.py | 58 ++++++- tests/test_launcher_config.py | 214 ++++++++++++++++++++++++++ 6 files changed, 313 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9d5aec..731f740 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +## [0.1.6] - 2026-05-21 + +### Added +- `profile_dir=` kwarg on `InvisiblePlaywright` (sync + async). When set, the session uses `firefox.launch_persistent_context()` so cookies, localStorage, sessionStorage, extensions, cache and prefs are kept on disk between runs. `__enter__` returns a `BrowserContext` directly: `with InvisiblePlaywright(profile_dir=p) as ctx: ctx.new_page()`. Pair with a stable `seed=` to also pin the fingerprint identity across runs. First run creates the dir; subsequent runs reuse it. + +### Fixed +- `launch_persistent_context(timezone_id="…")` no longer times out at 180s. Root cause: `juggler/content/main.js` calls `docShell.overrideTimezone(...)` on every navigation; the patched Firefox up to firefox-4 didn't expose that IDL method on `nsIDocShell`, so the call threw `TypeError: docShell.overrideTimezone is not a function`. On the non-persistent path the error fired *after* launch and was harmless; on the persistent path it blocked the launch handshake. `firefox-5` ships the C++ method (see `patch.md` section 19); this release removes the firefox-4 era Python workaround that was filtering `locale`/`timezone_id` out of the persistent context kwargs. + +### Changed +- `BINARY_VERSION` bumped from `firefox-4` to `firefox-5`. The Python source delta is JS/Python only; the new Firefox build adds 50 lines of C++ in `docshell/base/nsIDocShell.idl` + `nsDocShell.cpp`. + ## [0.1.5] - 2026-05-20 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index 02f4cfc..3fe45c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "invisible-playwright" -version = "0.1.5" +version = "0.1.6" description = "Playwright wrapper for a patched Firefox with deterministic stealth profile." readme = "README.md" requires-python = ">=3.11" diff --git a/src/invisible_playwright/async_api.py b/src/invisible_playwright/async_api.py index 2933c1e..2b2eeca 100644 --- a/src/invisible_playwright/async_api.py +++ b/src/invisible_playwright/async_api.py @@ -3,9 +3,10 @@ from __future__ import annotations import asyncio import secrets +from pathlib import Path from typing import Any, Dict, Optional, Union -from playwright.async_api import Browser, Playwright, async_playwright +from playwright.async_api import Browser, BrowserContext, Playwright, async_playwright from ._fpforge import Profile, generate_profile from ._headless import make_virtual_display @@ -49,6 +50,7 @@ class InvisiblePlaywright: timezone: str = "", extra_prefs: Optional[Dict[str, Any]] = None, binary_path: Optional[str] = None, + profile_dir: Optional[Union[str, Path]] = None, ) -> None: # See sync launcher: `zoom.stealth.fpp.hw_seed` is int32_t — clamp. self.seed: int = int(seed) if seed is not None else secrets.randbits(31) @@ -61,12 +63,14 @@ class InvisiblePlaywright: self._timezone = timezone self._extra_prefs = extra_prefs self._binary_path = binary_path + self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None self._profile: Profile = generate_profile(self.seed, pin=self._pin) self._pw: Optional[Playwright] = None self._browser: Optional[Browser] = None + self._persistent_context: Optional[BrowserContext] = None self._virtual_display: Any = None - async def __aenter__(self) -> Browser: + async def __aenter__(self) -> Union[Browser, BrowserContext]: import sys as _sys executable = self._binary_path or ensure_binary() prefs = translate_profile_to_prefs( @@ -85,6 +89,24 @@ class InvisiblePlaywright: env = self._build_env() try: self._pw = await async_playwright().start() + if self._profile_dir is not None: + # See sync launcher for the persistent-context rationale. + self._profile_dir.mkdir(parents=True, exist_ok=True) + # firefox-5 ships the C++ overrideTimezone IDL method (C7 + # chiusura), so locale + timezone_id now propagate cleanly + # to the persistent context without hanging the launch. + self._persistent_context = await self._pw.firefox.launch_persistent_context( + user_data_dir=str(self._profile_dir), + executable_path=str(executable), + headless=pw_headless, + firefox_user_prefs=prefs, + proxy=playwright_proxy, + args=self._extra_args, + env=env, + **self._default_context_kwargs(), + ) + _patch_new_page_sleep(self._persistent_context) + return self._persistent_context self._browser = await self._pw.firefox.launch( executable_path=str(executable), headless=pw_headless, @@ -134,6 +156,12 @@ class InvisiblePlaywright: await self._teardown() async def _teardown(self) -> None: + if self._persistent_context is not None: + try: + await self._persistent_context.close() + except Exception: + pass + self._persistent_context = None if self._browser is not None: try: await self._browser.close() diff --git a/src/invisible_playwright/constants.py b/src/invisible_playwright/constants.py index 43269eb..b13a458 100644 --- a/src/invisible_playwright/constants.py +++ b/src/invisible_playwright/constants.py @@ -7,7 +7,7 @@ bugfixes don't force a multi-hour Firefox rebuild. from __future__ import annotations # Bump this when a new patched Firefox build is released on GitHub. -BINARY_VERSION: str = "firefox-4" +BINARY_VERSION: str = "firefox-5" # Underlying Firefox version (for display only; does not drive downloads). FIREFOX_UPSTREAM_VERSION: str = "150.0.1" diff --git a/src/invisible_playwright/launcher.py b/src/invisible_playwright/launcher.py index b79e4ff..07c7967 100644 --- a/src/invisible_playwright/launcher.py +++ b/src/invisible_playwright/launcher.py @@ -2,9 +2,10 @@ from __future__ import annotations import secrets +from pathlib import Path from typing import Any, Dict, Optional, Union -from playwright.sync_api import Browser, Playwright, sync_playwright +from playwright.sync_api import Browser, BrowserContext, Playwright, sync_playwright from ._fpforge import Profile, generate_profile from ._headless import make_virtual_display @@ -111,6 +112,7 @@ class InvisiblePlaywright: timezone: str = "", extra_prefs: Optional[Dict[str, Any]] = None, binary_path: Optional[str] = None, + profile_dir: Optional[Union[str, Path]] = None, ) -> None: """ Args: @@ -137,6 +139,15 @@ class InvisiblePlaywright: extra_prefs: Optional dict of Firefox prefs overlayed on top of the generated profile — useful for niche tweaks without monkey-patching the package. + profile_dir: Path to a persistent Firefox profile directory. + When set, the session uses ``launch_persistent_context()`` + so cookies, localStorage, sessionStorage, extensions, cache + and prefs are kept on disk between runs. ``__enter__`` + returns a ``BrowserContext`` (not a ``Browser``) — use it + directly: ``with InvisiblePlaywright(profile_dir=p) as ctx: + page = ctx.new_page()``. First run creates the dir; + subsequent runs reuse it. Pair with a stable ``seed=`` to + also pin the fingerprint identity across runs. """ # Constrain to int31 — Firefox's `zoom.stealth.fpp.hw_seed` and # related stealth prefs are declared as ``int32_t`` in @@ -154,12 +165,14 @@ class InvisiblePlaywright: self._timezone = timezone self._extra_prefs = extra_prefs self._binary_path = binary_path + self._profile_dir: Optional[Path] = Path(profile_dir) if profile_dir else None self._profile: Profile = generate_profile(self.seed, pin=self._pin) self._pw: Optional[Playwright] = None self._browser: Optional[Browser] = None + self._persistent_context: Optional[BrowserContext] = None self._virtual_display: Any = None - def __enter__(self) -> Browser: + def __enter__(self) -> Union[Browser, BrowserContext]: executable = self._binary_path or ensure_binary() prefs = self._build_prefs() playwright_proxy = _configure_proxy_shared(self._proxy, prefs) @@ -168,6 +181,25 @@ class InvisiblePlaywright: try: self._pw = sync_playwright().start() + if self._profile_dir is not None: + # Persistent context — cookies / localStorage / extensions / + # prefs all live on disk between runs. Stealth prefs are + # re-injected via firefox_user_prefs on every launch (Playwright + # writes them to user.js, which overrides anything in + # prefs.js inside the persistent dir). + self._profile_dir.mkdir(parents=True, exist_ok=True) + self._persistent_context = self._pw.firefox.launch_persistent_context( + user_data_dir=str(self._profile_dir), + executable_path=str(executable), + headless=pw_headless, + firefox_user_prefs=prefs, + proxy=playwright_proxy, + args=self._extra_args, + env=env, + **self._persistent_context_kwargs(), + ) + _patch_sync_new_page_sleep(self._persistent_context) + return self._persistent_context self._browser = self._pw.firefox.launch( executable_path=str(executable), headless=pw_headless, @@ -185,6 +217,22 @@ class InvisiblePlaywright: self._patch_new_context_defaults(self._browser) return self._browser + def _persistent_context_kwargs(self) -> Dict[str, Any]: + """Context-level kwargs accepted by launch_persistent_context. + + Identical to ``_default_context_kwargs``: viewport / screen / DPR / + color-scheme / locale / timezone_id. Up to firefox-4 we had to drop + locale and timezone_id because Playwright's per-realm overrides + called IDL methods (``docShell.languageOverride``, + ``docShell.overrideTimezone``) that weren't exposed by our patched + build, causing launch_persistent_context to hang for 180s. From + firefox-5 (C7 chiusura), the C++ ``overrideTimezone`` method is + present and ``languageOverride`` was already there, so the + per-realm overrides land and the persistent context starts in + ~20s like the non-persistent path. + """ + return self._default_context_kwargs() + def _patch_new_context_defaults(self, browser: Browser) -> None: """Wrap ``browser.new_context`` so its defaults derive from the profile (viewport, screen, DPR, color-scheme). Users get a @@ -226,6 +274,12 @@ class InvisiblePlaywright: self._teardown() def _teardown(self) -> None: + if self._persistent_context is not None: + try: + self._persistent_context.close() + except Exception: + pass + self._persistent_context = None if self._browser is not None: try: self._browser.close() diff --git a/tests/test_launcher_config.py b/tests/test_launcher_config.py index daf88c4..85047e5 100644 --- a/tests/test_launcher_config.py +++ b/tests/test_launcher_config.py @@ -55,3 +55,217 @@ def test_invisible_playwright_constructs_without_launching(): assert obj is not None obj2 = InvisiblePlaywright(seed=42, headless=True) assert obj2 is not None + + +# ─── profile_dir kwarg — persistent context support ─────────────────────── # + +import pytest +from pathlib import Path + + +@pytest.mark.unit +def test_profile_dir_none_by_default(): + """No persistent profile unless explicitly opted in. Prevents accidental + state-leak between scripts that share the same seed.""" + obj = InvisiblePlaywright(seed=42) + assert obj._profile_dir is None + assert obj._persistent_context is None + + +@pytest.mark.unit +def test_profile_dir_string_is_coerced_to_path(tmp_path): + """Accept str or Path. Always store as Path internally.""" + obj = InvisiblePlaywright(seed=42, profile_dir=str(tmp_path)) + assert isinstance(obj._profile_dir, Path) + assert obj._profile_dir == tmp_path + + +@pytest.mark.unit +def test_profile_dir_path_is_stored_as_is(tmp_path): + obj = InvisiblePlaywright(seed=42, profile_dir=tmp_path) + assert obj._profile_dir == tmp_path + + +@pytest.mark.unit +def test_profile_dir_does_not_create_dir_until_enter(tmp_path): + """Construction must not touch the filesystem. Directory creation only + happens when the user actually enters the context manager — otherwise + a typo at instantiation would silently spawn dirs.""" + target = tmp_path / "nonexistent" + assert not target.exists() + InvisiblePlaywright(seed=42, profile_dir=target) + assert not target.exists() + + +@pytest.mark.unit +def test_persistent_context_kwargs_match_default_exactly(): + """Persistent kwargs must be IDENTICAL to non-persistent default + kwargs. From firefox-5 (C7 closure) the docShell.overrideTimezone + method is present in the patched binary, so the per-realm overrides + Playwright applies for `locale=`/`timezone_id=` land successfully and + no longer hang the persistent context launch handshake. + + Before firefox-5 we had to filter these out (180s timeout otherwise). + A future refactor that re-introduces that filter would silently lose + timezone/locale isolation in persistent sessions — this test is the + sentinel that catches the regression at the unit level.""" + obj = InvisiblePlaywright(seed=42, locale="en-GB", timezone="Europe/London", + profile_dir="/tmp/x") + persistent = obj._persistent_context_kwargs() + default = obj._default_context_kwargs() + assert persistent == default, ( + "persistent_context kwargs must match default_context kwargs since " + f"firefox-5.\n persistent: {persistent!r}\n default: {default!r}" + ) + + +@pytest.mark.unit +def test_persistent_context_kwargs_INCLUDES_locale_and_timezone(): + """Sentinel for the C7 closure: firefox-5 ships the C++ overrideTimezone + IDL method, so locale + timezone_id MUST be passed through to + launch_persistent_context. If they're not, the wrapper is silently + dropping per-context isolation — two sessions with different + `timezone=` would end up sharing whatever TZ the env var set. + + Regression-defense: do NOT re-add the firefox-4-era filter.""" + obj = InvisiblePlaywright(seed=42, locale="en-GB", timezone="Europe/London", + profile_dir="/tmp/x") + kw = obj._persistent_context_kwargs() + assert kw.get("locale") == "en-GB", ( + f"locale must be in persistent kwargs (firefox-5+ supports it via " + f"docShell.languageOverride). Got: {kw.get('locale')!r}" + ) + assert kw.get("timezone_id") == "Europe/London", ( + f"timezone_id must be in persistent kwargs (firefox-5+ supports it " + f"via docShell.overrideTimezone IDL method, patch.md section 19). " + f"Got: {kw.get('timezone_id')!r}" + ) + + +@pytest.mark.unit +def test_persistent_context_kwargs_omits_timezone_when_empty_string(): + """Empty timezone='' is the 'use host TZ' sentinel — must NOT pass + timezone_id to Playwright in that case (would pin to literal '' and + break Intl).""" + obj = InvisiblePlaywright(seed=42, timezone="", profile_dir="/tmp/x") + kw = obj._persistent_context_kwargs() + assert "timezone_id" not in kw + + +# ─── Mocked __enter__ flow — confirms the right Playwright call is made ── # + + +@pytest.mark.unit +def test_enter_with_profile_dir_calls_launch_persistent_context(tmp_path, monkeypatch): + """When profile_dir is set, __enter__ must call + `firefox.launch_persistent_context(user_data_dir=...)` and NOT + `firefox.launch(...)`. This is the structural test that the persistent + branch is wired correctly — without it, profile_dir would be silently + accepted but ignored.""" + from unittest.mock import MagicMock + # Mock ensure_binary so we don't hit the network + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + + # Mock sync_playwright().start() → fake playwright with our recording firefox + fake_ctx = MagicMock(name="persistent_context") + fake_firefox = MagicMock() + fake_firefox.launch_persistent_context.return_value = fake_ctx + fake_playwright = MagicMock() + fake_playwright.firefox = fake_firefox + fake_pw = MagicMock() + fake_pw.start.return_value = fake_playwright + + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + profile = tmp_path / "myprofile" + obj = InvisiblePlaywright(seed=42, profile_dir=profile) + returned = obj.__enter__() + + # The persistent branch was taken + fake_firefox.launch_persistent_context.assert_called_once() + fake_firefox.launch.assert_not_called() + + # The user_data_dir was passed verbatim + call_kwargs = fake_firefox.launch_persistent_context.call_args.kwargs + assert call_kwargs["user_data_dir"] == str(profile) + + # The directory was created on disk (Playwright fails otherwise) + assert profile.exists() and profile.is_dir() + + # __enter__ returned the BrowserContext, not a Browser + assert returned is fake_ctx + + +@pytest.mark.unit +def test_enter_without_profile_dir_calls_launch_not_persistent(tmp_path, monkeypatch): + """Default path: profile_dir=None → firefox.launch, not + launch_persistent_context. Sentinel that the non-persistent flow + isn't accidentally rerouted.""" + from unittest.mock import MagicMock + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + + fake_browser = MagicMock(name="browser") + fake_browser.new_context = MagicMock() + fake_firefox = MagicMock() + fake_firefox.launch.return_value = fake_browser + fake_playwright = MagicMock() + fake_playwright.firefox = fake_firefox + fake_pw = MagicMock() + fake_pw.start.return_value = fake_playwright + + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + obj = InvisiblePlaywright(seed=42) + returned = obj.__enter__() + + fake_firefox.launch.assert_called_once() + fake_firefox.launch_persistent_context.assert_not_called() + assert returned is fake_browser + + +@pytest.mark.unit +def test_persistent_context_user_data_dir_is_created_if_missing(tmp_path, monkeypatch): + """First-run scenario: the directory the user names doesn't exist yet. + __enter__ must mkdir -p it (Playwright won't, and would crash with + 'user_data_dir does not exist').""" + from unittest.mock import MagicMock + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + fake_pw = MagicMock() + fake_pw.start.return_value = MagicMock() + fake_pw.start.return_value.firefox.launch_persistent_context = MagicMock( + return_value=MagicMock() + ) + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + nested = tmp_path / "a" / "b" / "c" / "profile" + assert not nested.parent.exists() # parent doesn't exist either + obj = InvisiblePlaywright(seed=42, profile_dir=nested) + obj.__enter__() + assert nested.is_dir() + + +@pytest.mark.unit +def test_teardown_closes_persistent_context(tmp_path, monkeypatch): + """The teardown must close the persistent context. Forgetting this + leaves Firefox + Playwright running until the parent process exits, + which on long-running tools (job orchestrators, MCP servers) leaks + handles indefinitely.""" + from unittest.mock import MagicMock + monkeypatch.setattr("invisible_playwright.launcher.ensure_binary", + lambda: tmp_path / "firefox") + fake_ctx = MagicMock(name="persistent_context") + fake_pw = MagicMock() + fake_pw.start.return_value.firefox.launch_persistent_context.return_value = fake_ctx + monkeypatch.setattr("invisible_playwright.launcher.sync_playwright", + lambda: fake_pw) + + obj = InvisiblePlaywright(seed=42, profile_dir=tmp_path / "p") + obj.__enter__() + obj.__exit__(None, None, None) + fake_ctx.close.assert_called_once()