From ee0fe57ced85454322cda9ad13746d4a966df8d5 Mon Sep 17 00:00:00 2001 From: Federico <85809106+feder-cr@users.noreply.github.com> Date: Thu, 28 May 2026 17:05:22 -0700 Subject: [PATCH] feat: public config helpers for third-party integrations (#25) Adds invisible_playwright.config module with: - get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display) -> dict - get_default_args() -> list Both also re-exported at the package root alongside the existing InvisiblePlaywright. ensure_binary is also re-exported there for parity with the cloakbrowser.download.ensure_binary integration pattern that downstream projects (Skyvern PR #5340, crawlee-python PR #1794, agno PR #8129) already expect. These helpers let third-party fetchers (changedetection.io plugins, Crawlee BrowserPool subclasses, agno toolkits) drive playwright.firefox.launch(executable_path=..., firefox_user_prefs=...) themselves without depending on the InvisiblePlaywright context manager owning the lifecycle. Same seed semantics, same humanize toggle, same extra_prefs overlay as the existing wrapper. Tests: tests/unit/test_config_public.py adds 14 unit tests covering deterministic seed, locale/timezone/pin/extra_prefs/humanize variations, and round-trip via the public namespace. Full unit suite (392 tests) stays green. Backwards compatible: InvisiblePlaywright surface is unchanged. BINARY_VERSION stays at firefox-7. Python-only release. --- CHANGELOG.md | 12 +++ README.md | 19 ++++ pyproject.toml | 2 +- src/invisible_playwright/__init__.py | 14 ++- src/invisible_playwright/config.py | 98 +++++++++++++++++++++ tests/unit/test_config_public.py | 125 +++++++++++++++++++++++++++ 6 files changed, 267 insertions(+), 3 deletions(-) create mode 100644 src/invisible_playwright/config.py create mode 100644 tests/unit/test_config_public.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e5836b0..2bf26b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ## [Unreleased] +## [0.2.0] - 2026-05-28 + +### Added +- Public config helpers in `invisible_playwright.config`: `get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display)` returns a complete `firefox_user_prefs` dict; `get_default_args()` returns the baseline CLI args list (currently empty). Both also re-exported at the package root. +- `invisible_playwright.ensure_binary` re-exported at the package root for parity with the `cloakbrowser.download.ensure_binary` integration pattern that downstream projects (Skyvern, Crawlee, agno) already expect. +- These helpers let third-party fetchers (changedetection.io plugins, Crawlee `BrowserPool` subclasses, agno toolkits) drive `playwright.firefox.launch(executable_path=..., firefox_user_prefs=...)` themselves without depending on the `InvisiblePlaywright` context manager owning the lifecycle. +- `tests/unit/test_config_public.py`: 14 unit tests covering deterministic seed, locale / timezone / pin / extra_prefs / humanize variations, and round-trip via the public namespace. + +### Unchanged +- `InvisiblePlaywright` context manager surface is identical (backwards compatible). +- `BINARY_VERSION` stays at `firefox-7`. Python-only release; no new Firefox build. + ## [0.1.8] - 2026-05-23 ### Fixed diff --git a/README.md b/README.md index 3fa95b3..7629a68 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,25 @@ invisible_playwright version # wrapper and binary versions invisible_playwright clear-cache # remove all cached binaries ``` +## Public API for downstream integrations + +When you're building a third-party fetcher (a Crawlee `BrowserPool` subclass, a changedetection.io plugin, an agno toolkit, a Skyvern backend) and need to own the browser lifecycle yourself, use the public helpers instead of `InvisiblePlaywright`: + +```python +from playwright.async_api import async_playwright +from invisible_playwright import ensure_binary, get_default_stealth_prefs + +async with async_playwright() as p: + browser = await p.firefox.launch( + executable_path=str(ensure_binary()), + firefox_user_prefs=get_default_stealth_prefs(seed=42), + ) +``` + +`get_default_stealth_prefs(seed, *, pin, locale, timezone, extra_prefs, humanize, virtual_display)` returns the same dict that `InvisiblePlaywright(seed=..., locale=..., ...)` would inject. Same deterministic seed semantics, same humanize toggle, same `extra_prefs` overlay. `ensure_binary()` downloads the patched Firefox on first call and returns its absolute path. + +For everyday Python usage the `InvisiblePlaywright` context manager is still the recommended entry point. + ## Related projects invisible_playwright takes a different angle than the major Firefox-hardening projects but stands on their shoulders: diff --git a/pyproject.toml b/pyproject.toml index a86f4e9..4800e6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "invisible-playwright" -version = "0.1.9" +version = "0.2.0" description = "Playwright wrapper for a patched Firefox with deterministic stealth profile." readme = "README.md" requires-python = ">=3.11" diff --git a/src/invisible_playwright/__init__.py b/src/invisible_playwright/__init__.py index 5ce6fc6..f93def7 100644 --- a/src/invisible_playwright/__init__.py +++ b/src/invisible_playwright/__init__.py @@ -15,8 +15,10 @@ Quickstart: page = browser.new_page() page.click("#submit") # expanded into a Bezier trajectory """ -from .launcher import InvisiblePlaywright +from .config import get_default_args, get_default_stealth_prefs from .constants import BINARY_VERSION, FIREFOX_UPSTREAM_VERSION +from .download import ensure_binary +from .launcher import InvisiblePlaywright from importlib.metadata import PackageNotFoundError, version as _pkg_version @@ -27,4 +29,12 @@ except PackageNotFoundError: # marker rather than risk shipping a stale hardcoded string. __version__ = "0.0.0+unknown" -__all__ = ["InvisiblePlaywright", "BINARY_VERSION", "FIREFOX_UPSTREAM_VERSION", "__version__"] +__all__ = [ + "InvisiblePlaywright", + "ensure_binary", + "get_default_stealth_prefs", + "get_default_args", + "BINARY_VERSION", + "FIREFOX_UPSTREAM_VERSION", + "__version__", +] diff --git a/src/invisible_playwright/config.py b/src/invisible_playwright/config.py new file mode 100644 index 0000000..9d1a80b --- /dev/null +++ b/src/invisible_playwright/config.py @@ -0,0 +1,98 @@ +"""Public helpers for building Firefox launch config without using ``InvisiblePlaywright``. + +Use these when you need to call ``playwright.firefox.launch()`` (or +``firefox.launch_persistent_context()``) directly with our patched binary +and stealth prefs, instead of using the ``InvisiblePlaywright`` context +manager. + +Typical caller is an external integration that owns its own browser +lifecycle (a Crawlee/Skyvern/changedetection-style fetcher, a Playwright +Server wrapper, a multi-language harness) and just wants the building +blocks:: + + from playwright.async_api import async_playwright + from invisible_playwright import ensure_binary, get_default_stealth_prefs + + async with async_playwright() as p: + browser = await p.firefox.launch( + executable_path=str(ensure_binary()), + firefox_user_prefs=get_default_stealth_prefs(seed=42), + ) + +For everyday Python usage the ``InvisiblePlaywright`` context manager is +still the recommended entry point; these helpers expose the same internals +without the lifecycle ownership. +""" +from __future__ import annotations + +import secrets +from typing import Any, Dict, List, Optional, Union + +from ._fpforge import generate_profile +from .prefs import translate_profile_to_prefs + + +def get_default_stealth_prefs( + seed: Optional[int] = None, + *, + pin: Optional[Dict[str, Any]] = None, + locale: str = "en-US", + timezone: str = "", + extra_prefs: Optional[Dict[str, Any]] = None, + humanize: Union[bool, float] = True, + virtual_display: bool = False, +) -> Dict[str, Any]: + """Build a complete ``firefox_user_prefs`` dict for ``firefox.launch()``. + + Same prefs that ``InvisiblePlaywright(seed=..., locale=..., timezone=..., + extra_prefs=..., humanize=...)`` would inject. Use this when you need to + drive ``playwright.firefox.launch()`` yourself. + + Args: + seed: Integer seed for the Bayesian fingerprint sampler. Same seed + produces the same fingerprint. ``None`` generates a fresh + random int31 (matches ``InvisiblePlaywright`` default). + pin: Optional dict forcing specific fingerprint fields while the + rest stays seed-derived. See ``docs/pinning.md``. + locale: BCP-47 tag (e.g. ``"en-US"``). Drives ``Accept-Language`` + and ``navigator.language``. + timezone: IANA timezone (e.g. ``"America/New_York"``). Empty means + use the host TZ. + extra_prefs: Optional dict overlaid LAST onto the generated prefs. + humanize: When True (default), every mouse move is expanded into + a Bezier trajectory by the patched Juggler. A float caps the + motion in seconds. False disables the behavior. + virtual_display: When True on Windows, apply GPU-disabling prefs + to prevent GPU process crashes on virtual desktops without + D3D11 backend. + + Returns: + Dict ready to pass as ``firefox_user_prefs=`` to + ``playwright.firefox.launch()`` or ``launch_persistent_context()``. + """ + resolved_seed = int(seed) if seed is not None else secrets.randbits(31) + profile = generate_profile(resolved_seed, pin=pin) + prefs = translate_profile_to_prefs( + profile, + locale=locale, + timezone=timezone, + extra_prefs=extra_prefs, + virtual_display=virtual_display, + ) + prefs["invisible_playwright.humanize"] = bool(humanize) + if humanize: + max_seconds = float(humanize) if not isinstance(humanize, bool) else 1.5 + prefs["invisible_playwright.humanize.maxTime"] = str(max_seconds) + return prefs + + +def get_default_args() -> List[str]: + """Return the default Firefox CLI args to pass via ``args=``. + + Currently empty list, since all our stealth configuration is delivered + via ``firefox_user_prefs`` rather than CLI flags. Exposed for parity + with the ``cloakbrowser.config.get_default_stealth_args`` pattern and + to future-proof integrations that already wire ``args=[*existing, + *get_default_args()]``. + """ + return [] diff --git a/tests/unit/test_config_public.py b/tests/unit/test_config_public.py new file mode 100644 index 0000000..0e26e36 --- /dev/null +++ b/tests/unit/test_config_public.py @@ -0,0 +1,125 @@ +"""Unit tests for the public ``config`` helpers.""" + +import pytest + +from invisible_playwright import ( + ensure_binary, + get_default_args, + get_default_stealth_prefs, +) +from invisible_playwright.config import get_default_stealth_prefs as _direct + + +pytestmark = pytest.mark.unit + + +def test_get_default_args_is_empty_list(): + """Currently no baseline CLI args, but must return a list (mutable, fresh each call).""" + args = get_default_args() + assert args == [] + assert isinstance(args, list) + args.append("--foo") + # next call must return a fresh empty list, not the mutated one + assert get_default_args() == [] + + +def test_get_default_stealth_prefs_random_seed_returns_dict(): + """No seed -> fresh random fingerprint, dict has expected stealth keys.""" + prefs = get_default_stealth_prefs() + assert isinstance(prefs, dict) + assert len(prefs) > 0 + # humanize toggle is always set explicitly + assert "invisible_playwright.humanize" in prefs + assert prefs["invisible_playwright.humanize"] is True + + +def test_get_default_stealth_prefs_seed_is_deterministic(): + """Same seed -> byte-identical prefs across calls.""" + a = get_default_stealth_prefs(seed=42) + b = get_default_stealth_prefs(seed=42) + assert a == b + + +def test_get_default_stealth_prefs_different_seeds_differ(): + """Different seeds -> different prefs.""" + a = get_default_stealth_prefs(seed=1) + b = get_default_stealth_prefs(seed=2) + assert a != b + + +def test_humanize_false_disables_prefs(): + """humanize=False removes the maxTime knob and flips the toggle to False.""" + prefs = get_default_stealth_prefs(seed=42, humanize=False) + assert prefs["invisible_playwright.humanize"] is False + assert "invisible_playwright.humanize.maxTime" not in prefs + + +def test_humanize_default_sets_max_time_1_5(): + """humanize=True -> default maxTime is 1.5s, stored as string.""" + prefs = get_default_stealth_prefs(seed=42, humanize=True) + assert prefs["invisible_playwright.humanize"] is True + assert prefs["invisible_playwright.humanize.maxTime"] == "1.5" + + +def test_humanize_float_overrides_max_time(): + """Float for humanize is the explicit cap in seconds.""" + prefs = get_default_stealth_prefs(seed=42, humanize=3.0) + assert prefs["invisible_playwright.humanize"] is True + assert prefs["invisible_playwright.humanize.maxTime"] == "3.0" + + +def test_extra_prefs_overlay_takes_precedence(): + """extra_prefs overlay LAST overrides any baseline value.""" + prefs = get_default_stealth_prefs( + seed=42, extra_prefs={"some.custom.pref": 999} + ) + assert prefs["some.custom.pref"] == 999 + + +def test_extra_prefs_can_override_baseline(): + """A key in extra_prefs that also exists in baseline gets overridden.""" + baseline = get_default_stealth_prefs(seed=42) + a_baseline_key = next(iter(baseline.keys())) + overridden = get_default_stealth_prefs( + seed=42, extra_prefs={a_baseline_key: "OVERRIDDEN_SENTINEL"} + ) + assert overridden[a_baseline_key] == "OVERRIDDEN_SENTINEL" + + +def test_locale_argument_changes_prefs(): + """Different locales produce different prefs (Accept-Language affected).""" + en = get_default_stealth_prefs(seed=42, locale="en-US") + it = get_default_stealth_prefs(seed=42, locale="it-IT") + assert en != it + + +def test_timezone_argument_changes_prefs(): + """Different timezones produce different prefs.""" + ny = get_default_stealth_prefs(seed=42, timezone="America/New_York") + rome = get_default_stealth_prefs(seed=42, timezone="Europe/Rome") + assert ny != rome + + +def test_pin_argument_forces_specific_fields(): + """Pin forces a specific field while the rest stays seed-derived.""" + plain = get_default_stealth_prefs(seed=42) + pinned = get_default_stealth_prefs( + seed=42, pin={"hardware.concurrency": 999} + ) + # something in the dict must differ vs the plain seed=42 build + assert plain != pinned + + +def test_public_import_matches_direct_import(): + """Top-level re-export and direct module import return identical output.""" + a = get_default_stealth_prefs(seed=42) + b = _direct(seed=42) + assert a == b + + +def test_ensure_binary_is_callable_via_public_namespace(): + """ensure_binary is re-exported and stays callable from the package root.""" + # We don't invoke it (would trigger a network download in CI) — just + # verify the public attribute is the same callable as the underlying. + from invisible_playwright.download import ensure_binary as _direct_eb + assert ensure_binary is _direct_eb