diff --git a/tests/test_constants.py b/tests/test_constants.py index fcdeed9..8d124a7 100644 --- a/tests/test_constants.py +++ b/tests/test_constants.py @@ -1,6 +1,13 @@ import pytest -from invisible_playwright.constants import ARCHIVE_NAME, BINARY_BASENAME, BINARY_VERSION +from invisible_playwright.constants import ( + ARCHIVE_NAME, + BINARY_BASENAME, + BINARY_ENTRY_REL, + BINARY_VERSION, + FIREFOX_UPSTREAM_VERSION, + RELEASE_URL_TEMPLATE, +) @pytest.mark.unit @@ -33,3 +40,143 @@ def test_archive_name_unsupported_raises(): def test_binary_basename_format(): assert "firefox" in BINARY_BASENAME.lower() assert "stealth" in BINARY_BASENAME.lower() + + +# ---- Comprehensive ARCHIVE_NAME edge cases -------------------------------- # +# Same risk shape as bug #15: a missed format assumption (sha256sum binary +# mode) silently produced wrong output. Same class of bug here would be +# uppercase platform string or odd machine value passing through to a +# wrong-named asset on the CDN and 404-ing. + +@pytest.mark.unit +@pytest.mark.parametrize("platform_key,machine,expected_substring", [ + ("win32", "AMD64", "win-x86_64.zip"), # Windows reports AMD64 + ("win32", "amd64", "win-x86_64.zip"), # lowercase variant + ("win32", "x86_64", "win-x86_64.zip"), # mingw-style + ("linux", "x86_64", "linux-x86_64.tar.gz"), # standard Linux + ("linux", "AMD64", "linux-x86_64.tar.gz"), # odd but plausible + ("Linux", "x86_64", "linux-x86_64.tar.gz"), # case-insensitive platform + ("WIN32", "AMD64", "win-x86_64.zip"), # ALL CAPS platform +]) +def test_archive_name_accepts_case_variations(platform_key, machine, expected_substring): + """sys.platform / platform.machine() return inconsistent casing across + OS versions and Python versions. The asset filename must be stable + regardless — otherwise the CDN 404s.""" + assert ARCHIVE_NAME(platform_key, machine).endswith(expected_substring) + + +@pytest.mark.unit +@pytest.mark.parametrize("machine", ["i386", "i686", "ppc64le", "armv7l", "riscv64"]) +def test_archive_name_rejects_unsupported_arches(machine): + """Unsupported arches must raise NotImplementedError with the bad value + in the message — silent fallback to a default arch would download the + wrong binary, run, and fingerprint differently.""" + with pytest.raises(NotImplementedError, match=machine): + ARCHIVE_NAME("linux", machine) + + +@pytest.mark.unit +@pytest.mark.parametrize("machine", ["arm64", "aarch64"]) +def test_archive_name_arm64_not_yet_supported(machine): + """ARM64 is a frequent request (issue #6). Until binaries exist for it, + ARCHIVE_NAME should hard-fail rather than silently degrade. If this test + starts failing because someone shipped ARM64 builds, replace it with the + positive case.""" + with pytest.raises(NotImplementedError): + ARCHIVE_NAME("linux", machine) + + +@pytest.mark.unit +@pytest.mark.parametrize("platform_key", ["darwin", "freebsd", "cygwin", "openbsd"]) +def test_archive_name_rejects_unsupported_platforms(platform_key): + """Same logic — non-Linux/non-Windows platforms must raise, not silently + pick one of the two.""" + with pytest.raises(NotImplementedError, match=platform_key): + ARCHIVE_NAME(platform_key, "x86_64") + + +# ---- ARCHIVE_NAME ↔ BINARY_ENTRY_REL invariant ---------------------------- # +# For every supported platform there MUST be an entry in BINARY_ENTRY_REL, +# otherwise ensure_binary() will raise NotImplementedError AFTER having +# already downloaded a 110 MB tarball — terrible UX. + +@pytest.mark.unit +def test_binary_entry_rel_covers_every_supported_platform(): + """If ARCHIVE_NAME accepts a platform key, BINARY_ENTRY_REL must declare + where the executable lives inside the archive for it.""" + for plat in ["win32", "linux"]: + ARCHIVE_NAME(plat, "x86_64") # must not raise + assert plat in BINARY_ENTRY_REL, ( + f"ARCHIVE_NAME accepts {plat!r} but BINARY_ENTRY_REL has no entry " + f"— ensure_binary() will fail late after a 110 MB download." + ) + + +@pytest.mark.unit +def test_binary_entry_rel_extension_matches_platform(): + """firefox.exe on Windows, plain `firefox` on Linux.""" + assert BINARY_ENTRY_REL["win32"].endswith(".exe") + assert not BINARY_ENTRY_REL["linux"].endswith(".exe") + assert BINARY_ENTRY_REL["linux"] == "firefox" + + +# ---- RELEASE_URL_TEMPLATE shape ------------------------------------------- # + +@pytest.mark.unit +def test_release_url_template_is_https(): + """No http://. GitHub redirects http but we never accept the redirect.""" + assert RELEASE_URL_TEMPLATE.startswith("https://github.com/") + + +@pytest.mark.unit +def test_release_url_template_has_required_placeholders(): + """{tag} and {asset} must both be present, otherwise _resolve_asset_url + won't format a usable URL and downloads fail with confusing 404s.""" + assert "{tag}" in RELEASE_URL_TEMPLATE + assert "{asset}" in RELEASE_URL_TEMPLATE + + +@pytest.mark.unit +def test_release_url_template_formats_cleanly(): + """Confirm .format() actually substitutes — catches typos like {tags}.""" + url = RELEASE_URL_TEMPLATE.format(tag="firefox-99", asset="thing.zip") + assert "{" not in url and "}" not in url + assert "firefox-99" in url + assert "thing.zip" in url + + +@pytest.mark.unit +def test_release_url_points_at_owned_repo(): + """The template MUST point at an owner/repo the maintainer actually + controls. A typo here would direct everyone's downloads at a stranger's + GitHub account — silent supply-chain risk.""" + assert "/feder-cr/invisible_playwright/" in RELEASE_URL_TEMPLATE, ( + f"RELEASE_URL_TEMPLATE was changed to point elsewhere: " + f"{RELEASE_URL_TEMPLATE!r}. Update this test only if the move is intentional." + ) + + +# ---- Firefox upstream version sanity -------------------------------------- # + +@pytest.mark.unit +def test_firefox_upstream_version_is_three_part_semver(): + parts = FIREFOX_UPSTREAM_VERSION.split(".") + assert len(parts) >= 2, f"version too short: {FIREFOX_UPSTREAM_VERSION!r}" + for p in parts: + assert p.isdigit(), f"non-numeric segment in {FIREFOX_UPSTREAM_VERSION!r}" + + +@pytest.mark.unit +def test_binary_basename_includes_upstream_version(): + """The basename references the upstream version, so the asset filename + on the CDN encodes which Firefox was patched. Bumping FIREFOX_UPSTREAM_VERSION + without rebuilding would leave stale binaries — this guards against + accidentally desyncing the two.""" + assert FIREFOX_UPSTREAM_VERSION in BINARY_BASENAME + + +@pytest.mark.unit +@pytest.mark.parametrize("plat", ["win32", "linux"]) +def test_archive_name_includes_upstream_version(plat): + """Same desync guard, from the other direction.""" + assert FIREFOX_UPSTREAM_VERSION in ARCHIVE_NAME(plat, "x86_64") diff --git a/tests/test_download.py b/tests/test_download.py index 8e15406..b32dced 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -4,15 +4,20 @@ import tarfile from pathlib import Path import pytest +import requests import responses -from invisible_playwright.constants import BINARY_VERSION +from invisible_playwright.constants import BINARY_VERSION, RELEASE_URL_TEMPLATE from invisible_playwright.download import ( + _download_file, _extract, _github_token, _parse_checksums, _parse_owner_repo, + _resolve_asset_url, _sha256_file, + cache_dir_for_version, + cache_root, ensure_binary, ) @@ -161,6 +166,166 @@ def test_parse_checksums_uses_last_token_as_filename(): assert "some/nested/file.zip" in out +# DL3 regression — issue #15 (LostBoxArt). +# GNU coreutils `sha256sum` (and `shasum -b`) print filenames in BINARY MODE +# with a leading `*`: "hash *filename". The parser used parts[-1] verbatim +# so the key became "*filename" and lookups by bare filename returned None, +# raising `RuntimeError: no SHA256 for {asset}` on every first-time fetch. +@pytest.mark.unit +def test_parse_checksums_strips_star_prefix_binary_mode(): + """`sha256sum -b` format (default on Linux when reading actual files).""" + text = "abc123 *firefox.tar.gz\n" + out = _parse_checksums(text) + assert out == {"firefox.tar.gz": "abc123"}, ( + "binary-mode '*' prefix must be stripped from the filename key" + ) + + +@pytest.mark.unit +def test_parse_checksums_handles_mixed_binary_and_text_mode(): + """A single checksums.txt with one binary-mode line and one text-mode line. + Both keys must be normalized (no `*` prefix) so consumers can use the bare + filename as the lookup key regardless of how each line was produced.""" + text = ( + "aaa111 *firefox-win.zip\n" + "bbb222 firefox-linux.tar.gz\n" + ) + out = _parse_checksums(text) + assert out == {"firefox-win.zip": "aaa111", "firefox-linux.tar.gz": "bbb222"} + + +@pytest.mark.unit +def test_parse_checksums_handles_multiple_leading_stars(): + """`.lstrip("*")` strips any run of leading asterisks. Not a real sha256sum + format but defensive — guarantees no `*` survives in any key.""" + text = "abc123 **doubled.zip\n" + out = _parse_checksums(text) + assert "doubled.zip" in out + assert "**doubled.zip" not in out + + +@pytest.mark.unit +def test_parse_checksums_handles_crlf_line_endings(): + """sha256sum.exe on Windows writes CRLF. The .strip() on each line should + consume the \\r so the key doesn't end up as 'firefox.zip\\r'.""" + text = "abc123 *firefox.zip\r\ndef456 other.tar.gz\r\n" + out = _parse_checksums(text) + assert out == {"firefox.zip": "abc123", "other.tar.gz": "def456"} + + +@pytest.mark.unit +def test_parse_checksums_handles_utf8_bom_at_start(): + """Some Windows tools prepend a UTF-8 BOM. The first line shouldn't be lost.""" + text = "abc123 *firefox.zip\n" + out = _parse_checksums(text) + # The BOM stays attached to the hash field as a non-fatal artifact; + # what matters is that the FILENAME key is parsed and normalized. + keys = list(out.keys()) + assert "firefox.zip" in keys, f"BOM caused first line to be lost: keys={keys}" + + +@pytest.mark.unit +def test_parse_checksums_handles_indented_lines(): + """Leading whitespace on a data line must not break parsing.""" + text = " abc123 *indented.zip\n" + out = _parse_checksums(text) + assert out == {"indented.zip": "abc123"} + + +@pytest.mark.unit +def test_parse_checksums_handles_trailing_whitespace(): + """Trailing spaces on a line shouldn't end up in the key.""" + text = "abc123 *trailing.zip \n" + out = _parse_checksums(text) + # After .strip() the trailing spaces are gone, so the key is clean + assert out == {"trailing.zip": "abc123"} + + +@pytest.mark.unit +def test_parse_checksums_real_world_sha256sum_b_output(tmp_path): + """End-to-end: invoke the actual `sha256sum` (or its Python equivalent) + on a real file and verify the parser handles that output verbatim. + + We can't depend on sha256sum being on PATH on Windows, so we synthesize + the exact byte sequence that GNU coreutils 9.x produces.""" + fake_archive = tmp_path / "release.tar.gz" + fake_archive.write_bytes(b"some content") + sha = hashlib.sha256(fake_archive.read_bytes()).hexdigest() + # Exact format coreutils prints in binary mode (default for files): + # "*\n" + coreutils_output = f"{sha} *{fake_archive.name}\n" + + out = _parse_checksums(coreutils_output) + assert out == {"release.tar.gz": sha} + + +@pytest.mark.unit +def test_parse_checksums_text_mode_two_space_separator(): + """`sha256sum --text` format uses two spaces. Must also parse cleanly + and the key must be identical to the binary-mode case.""" + text = "abc123 textmode.zip\n" + out = _parse_checksums(text) + assert out == {"textmode.zip": "abc123"} + + +@pytest.mark.unit +def test_parse_checksums_empty_file_returns_empty_dict(): + assert _parse_checksums("") == {} + assert _parse_checksums("\n\n\n") == {} + assert _parse_checksums(" \n\t\n") == {} + + +@pytest.mark.unit +def test_parse_checksums_all_comment_file_returns_empty_dict(): + """A file with only comments shouldn't crash and shouldn't produce keys.""" + text = "# generated by release script\n# 2026-05-20\n" + assert _parse_checksums(text) == {} + + +# DL3 regression — full integration via ensure_binary: confirm the parser +# bug from #15 cannot regress when the live release format is mimicked exactly. +@pytest.mark.unit +@responses.activate +def test_ensure_binary_accepts_binary_mode_checksums(tmp_path, monkeypatch): + """Reproduce the EXACT format the GitHub release ships: + * + Before the #15 fix this raised + RuntimeError: no SHA256 for {asset} in checksums.txt + even though the asset and SHA were both present.""" + cache = tmp_path / "cache" + monkeypatch.setattr("invisible_playwright.download.cache_root", lambda: cache) + + archive_path = tmp_path / "archive.zip" + archive_bytes = _make_zip(archive_path, "firefox.exe", b"PEX!") + archive_sha = hashlib.sha256(archive_bytes).hexdigest() + from invisible_playwright.constants import ARCHIVE_NAME + asset = ARCHIVE_NAME("win32", "AMD64") + + url_archive = ( + f"https://github.com/feder-cr/invisible_playwright/releases/download/" + f"{BINARY_VERSION}/{asset}" + ) + url_sums = ( + f"https://github.com/feder-cr/invisible_playwright/releases/download/" + f"{BINARY_VERSION}/checksums.txt" + ) + + responses.add(responses.GET, url_archive, body=archive_bytes, status=200, + content_type="application/zip") + # Binary-mode format (note the `*`): regression sentinel for #15. + responses.add( + responses.GET, url_sums, + body=f"{archive_sha} *{asset}\n", + status=200, + ) + + # Force the platform branch the test mocks: + monkeypatch.setattr("sys.platform", "win32") + out = ensure_binary() + # No RuntimeError means the parser accepted the `*`-prefixed key. + assert out.exists() + + # DL4: unknown archive format (.rar) raises RuntimeError @pytest.mark.unit def test_extract_unknown_format_raises(tmp_path): @@ -375,3 +540,295 @@ def test_ensure_binary_missing_entry_after_extract_raises_linux(tmp_path, monkey with pytest.raises(RuntimeError, match="binary not found after extraction"): ensure_binary() + + +# ========================================================================== # +# _resolve_asset_url — public-repo direct URL vs private-repo API resolution +# ========================================================================== # +# This function chooses between two code paths based on whether a GitHub +# token is set. Both paths produce a downloadable URL but via different +# mechanisms, and a regression here would surface as 404 / 403 / wrong +# binary downloaded. + +@pytest.mark.unit +def test_resolve_asset_url_public_returns_direct_url(monkeypatch): + """No token → return the direct releases/download URL verbatim.""" + monkeypatch.delenv("STEALTHFOX_GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + url = _resolve_asset_url("firefox-4", "thing.zip") + assert url == RELEASE_URL_TEMPLATE.format(tag="firefox-4", asset="thing.zip") + assert "api.github.com" not in url # public path must skip the API + + +@pytest.mark.unit +def test_resolve_asset_url_public_url_format_is_stable(monkeypatch): + """The exact URL shape is what GitHub clients have learned to cache. + Changing it without bumping BINARY_VERSION would 404 on first fetch + for every existing user — guard against accidental drift.""" + monkeypatch.delenv("STEALTHFOX_GITHUB_TOKEN", raising=False) + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + url = _resolve_asset_url("firefox-4", "abc.tar.gz") + assert url == ( + "https://github.com/feder-cr/invisible_playwright/releases/" + "download/firefox-4/abc.tar.gz" + ) + + +@pytest.mark.unit +@responses.activate +def test_resolve_asset_url_private_uses_api_with_token(monkeypatch): + """Token set → hit the API and return the asset.url (which 302s with + Accept: application/octet-stream). The direct release URL would 404 + for a private repo even with the token in headers.""" + monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_fake") + monkeypatch.delenv("GITHUB_TOKEN", raising=False) + + api_url = ( + "https://api.github.com/repos/feder-cr/invisible_playwright" + "/releases/tags/firefox-4" + ) + responses.add( + responses.GET, api_url, + json={"assets": [ + {"name": "other.zip", "url": "https://api.github.com/.../1"}, + {"name": "wanted.zip", "url": "https://api.github.com/.../2"}, + ]}, + status=200, + ) + url = _resolve_asset_url("firefox-4", "wanted.zip") + assert url == "https://api.github.com/.../2" + + +@pytest.mark.unit +@responses.activate +def test_resolve_asset_url_private_raises_when_asset_missing(monkeypatch): + """If the asset name isn't on the release, raise — better to fail fast + with the asset name in the message than to download something else.""" + monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_fake") + api_url = ( + "https://api.github.com/repos/feder-cr/invisible_playwright" + "/releases/tags/firefox-4" + ) + responses.add( + responses.GET, api_url, + json={"assets": [{"name": "other.zip", "url": "x"}]}, + status=200, + ) + with pytest.raises(RuntimeError, match="not-here.zip"): + _resolve_asset_url("firefox-4", "not-here.zip") + + +@pytest.mark.unit +@responses.activate +def test_resolve_asset_url_private_propagates_api_4xx(monkeypatch): + """If the API returns 404 (release doesn't exist) or 401 (bad token), + don't swallow it silently — raise so the user sees the real reason.""" + monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_fake") + api_url = ( + "https://api.github.com/repos/feder-cr/invisible_playwright" + "/releases/tags/firefox-99" + ) + responses.add(responses.GET, api_url, status=404) + with pytest.raises(requests.HTTPError): + _resolve_asset_url("firefox-99", "thing.zip") + + +@pytest.mark.unit +@responses.activate +def test_resolve_asset_url_private_sends_auth_header(monkeypatch): + """The API call MUST include `Authorization: token `, otherwise + a private repo returns 404 and the user thinks the release is missing.""" + monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_secret") + api_url = ( + "https://api.github.com/repos/feder-cr/invisible_playwright" + "/releases/tags/firefox-4" + ) + + captured = {} + def callback(request): + captured["auth"] = request.headers.get("Authorization") + return (200, {}, '{"assets":[{"name":"x.zip","url":"https://x/y"}]}') + responses.add_callback(responses.GET, api_url, callback=callback, + content_type="application/json") + _resolve_asset_url("firefox-4", "x.zip") + assert captured["auth"] == "token ghp_secret" + + +# ========================================================================== # +# _download_file — file streaming + error propagation +# ========================================================================== # + +@pytest.mark.unit +@responses.activate +def test_download_file_writes_full_payload_to_disk(tmp_path): + """A 200 OK returns the full body; the file on disk matches byte-for-byte.""" + url = "https://example.com/some-large.bin" + payload = bytes(range(256)) * 1024 # 256 KB, varied bytes + responses.add(responses.GET, url, body=payload, status=200) + + dst = tmp_path / "downloaded.bin" + _download_file(url, dst) + assert dst.exists() + assert dst.read_bytes() == payload + + +@pytest.mark.unit +@responses.activate +def test_download_file_creates_parent_directories(tmp_path): + """The dst's parent may not exist yet — _download_file is expected to + mkdir -p before writing. Without this, the first fetch on a clean + machine raises FileNotFoundError because the cache dir doesn't exist.""" + url = "https://example.com/x.bin" + responses.add(responses.GET, url, body=b"data", status=200) + + deep = tmp_path / "a" / "b" / "c" / "x.bin" + _download_file(url, deep) + assert deep.exists() + assert deep.read_bytes() == b"data" + + +@pytest.mark.unit +@responses.activate +def test_download_file_propagates_http_404(tmp_path): + """404s from the CDN must raise — silent 404 → empty file → SHA mismatch + is a much worse failure mode.""" + url = "https://example.com/missing.bin" + responses.add(responses.GET, url, status=404) + with pytest.raises(requests.HTTPError): + _download_file(url, tmp_path / "out.bin") + + +@pytest.mark.unit +@responses.activate +def test_download_file_propagates_http_500(tmp_path): + """Server errors must surface, not be swallowed as 'empty download'.""" + url = "https://example.com/broken.bin" + responses.add(responses.GET, url, status=500) + with pytest.raises(requests.HTTPError): + _download_file(url, tmp_path / "out.bin") + + +@pytest.mark.unit +@responses.activate +def test_download_file_adds_auth_for_api_urls(monkeypatch, tmp_path): + """When downloading from api.github.com (private-repo flow), the + request MUST include `Authorization: token <...>` and + `Accept: application/octet-stream` — otherwise the API returns the + asset JSON instead of the binary.""" + monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_secret") + url = "https://api.github.com/repos/x/y/releases/assets/123" + + captured = {} + def callback(request): + captured["auth"] = request.headers.get("Authorization") + captured["accept"] = request.headers.get("Accept") + return (200, {}, b"BIN!") + responses.add_callback(responses.GET, url, callback=callback) + + _download_file(url, tmp_path / "out.bin") + assert captured["auth"] == "token ghp_secret" + assert captured["accept"] == "application/octet-stream" + + +@pytest.mark.unit +@responses.activate +def test_download_file_does_not_send_auth_for_non_api_urls(monkeypatch, tmp_path): + """Public-repo flow hits github.com/.../releases/download/... directly. + Sending an auth header to that URL is unnecessary and would leak the + token in CDN access logs.""" + monkeypatch.setenv("STEALTHFOX_GITHUB_TOKEN", "ghp_secret") + url = "https://github.com/feder-cr/invisible_playwright/releases/download/firefox-4/x.zip" + + captured = {} + def callback(request): + captured["auth"] = request.headers.get("Authorization") + return (200, {}, b"BIN!") + responses.add_callback(responses.GET, url, callback=callback) + + _download_file(url, tmp_path / "out.bin") + assert captured["auth"] is None, ( + "Auth header leaked to a public CDN URL — would expose the token " + "in GitHub's access logs." + ) + + +# ========================================================================== # +# cache_root + cache_dir_for_version — path resolution +# ========================================================================== # + +@pytest.mark.unit +def test_cache_root_returns_path(): + """Must return a Path, not a string — downstream code uses .mkdir() etc.""" + p = cache_root() + assert isinstance(p, Path) + + +@pytest.mark.unit +def test_cache_root_contains_package_name(): + """The cache dir should be identifiable as ours so users can `rm -rf` + it without nuking other tools' caches.""" + p = cache_root() + assert "invisible-playwright" in str(p).lower() + + +@pytest.mark.unit +def test_cache_dir_for_version_appends_version_segment(): + """Each binary version gets its own subdir so multiple versions can + coexist (useful for downgrade / A-B testing).""" + p = cache_dir_for_version("firefox-99") + assert p.name == "firefox-99" + assert p.parent == cache_root() + + +@pytest.mark.unit +def test_cache_dir_for_version_defaults_to_current_binary_version(): + """No-arg call uses the pinned BINARY_VERSION.""" + p = cache_dir_for_version() + assert p.name == BINARY_VERSION + + +@pytest.mark.unit +def test_cache_dir_isolation_between_versions(): + """firefox-3 and firefox-4 must NEVER share a directory — extraction + would clobber one with the other and break downgrade.""" + a = cache_dir_for_version("firefox-3") + b = cache_dir_for_version("firefox-4") + assert a != b + assert a.parent == b.parent # but they share the same root + + +# ========================================================================== # +# _parse_owner_repo — more edge cases +# ========================================================================== # + +@pytest.mark.unit +def test_parse_owner_repo_extracts_from_canonical_template(): + """Must work against the exact template stored in constants.py.""" + owner, repo = _parse_owner_repo(RELEASE_URL_TEMPLATE) + assert owner and repo # something extracted + assert "/" not in owner and "/" not in repo # no slashes in either segment + + +@pytest.mark.unit +@pytest.mark.parametrize("bad_template", [ + "http://github.com/x/y/releases/", # http, not https + "https://gitlab.com/x/y/releases/", # wrong host + "https://github.com/onlyone/releases/", # missing repo segment + "", # empty + "github.com/x/y/releases/", # missing scheme +]) +def test_parse_owner_repo_rejects_malformed_urls(bad_template): + """Any URL that doesn't match the canonical shape must raise — silent + None/empty extraction would build broken API URLs and confuse the user.""" + with pytest.raises(RuntimeError, match="cannot parse"): + _parse_owner_repo(bad_template) + + +@pytest.mark.unit +def test_parse_owner_repo_handles_repos_with_dashes_and_underscores(): + """Repo names with -, _, . are valid on GitHub; the regex must accept them.""" + owner, repo = _parse_owner_repo( + "https://github.com/my-org/my_cool.repo/releases/download/x/y.zip" + ) + assert owner == "my-org" + assert repo == "my_cool.repo" diff --git a/tests/test_release_e2e.py b/tests/test_release_e2e.py new file mode 100644 index 0000000..e024399 --- /dev/null +++ b/tests/test_release_e2e.py @@ -0,0 +1,253 @@ +"""End-to-end release tests. + +These exercise the FULL user install path against the LIVE GitHub release. +They are slow (download a ~110 MB binary, launch Firefox) and require network +access — marked `e2e` so they're excluded from the default suite. Run them +BEFORE announcing a release: + + pytest tests/test_release_e2e.py -m e2e -v + +Or to target a specific git revision (default is current HEAD on origin/main): + + INVPW_E2E_REV=v0.1.5 pytest tests/test_release_e2e.py -m e2e -v + +What each test verifies and why it exists: + + test_clean_install_from_git_main: + Spawns a fresh venv and pip-installs the wrapper from git HEAD. Confirms + the package has no broken metadata, missing deps, or import errors in a + pristine environment. Catches the "works on my machine because I already + have the dev deps" class of bug. + + test_fetch_against_live_release: + After the install, runs `python -m invisible_playwright fetch --force`, + which downloads the live tarball + checksums.txt for the pinned + BINARY_VERSION from the production GitHub release. This is THE test that + would have caught LostBoxArt's #15 — the checksums.txt parser bug only + manifested against the real binary-mode format the release ships, not + against unit-test mocks. + + test_version_command_after_fetch: + Confirms `python -m invisible_playwright --version` resolves the binary + and reports the expected `firefox-N` tag. Sanity check that the binary + landed in the cache and the wrapper can find it. + + test_playwright_launch_against_real_site (linux-only by default): + Launches the patched Firefox under the wrapper, navigates to a stable + public URL, and reads a known DOM property. This is the full stack: + wrapper init → Firefox launch → Juggler handshake → page.goto → + page.evaluate. If anything along the way regresses (Juggler protocol + schema drift, prefs typo, sandbox issue, …) this fails loudly. + +The tests use a temp cache dir per run (env var +`INVISIBLE_PLAYWRIGHT_CACHE_DIR`) so they never poison the developer's real +cache and never get false positives from a previously-cached binary. +""" +from __future__ import annotations + +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +import pytest + + +REPO_URL = "https://github.com/feder-cr/invisible_playwright.git" +REV = os.environ.get("INVPW_E2E_REV", "main") + + +# ---------- helpers --------------------------------------------------------- # + + +def _run(cmd: list[str], *, env: dict | None = None, cwd: Path | None = None, + timeout: int = 300, check: bool = True) -> subprocess.CompletedProcess: + """Run a subprocess with full output captured. Fail with both streams shown.""" + result = subprocess.run( + cmd, env=env, cwd=cwd, timeout=timeout, + capture_output=True, text=True, + ) + if check and result.returncode != 0: + raise AssertionError( + f"{' '.join(cmd)} exited {result.returncode}\n" + f"--- stdout ---\n{result.stdout[-3000:]}\n" + f"--- stderr ---\n{result.stderr[-3000:]}" + ) + return result + + +def _venv_python(venv: Path) -> Path: + if os.name == "nt": + return venv / "Scripts" / "python.exe" + return venv / "bin" / "python" + + +# ---------- fixtures -------------------------------------------------------- # + + +@pytest.fixture(scope="module") +def workspace() -> Path: + """A single temp dir reused across the module so we don't re-create the + venv + re-download the 110 MB tarball for every individual test.""" + root = Path(tempfile.mkdtemp(prefix="invpw-e2e-")) + yield root + shutil.rmtree(root, ignore_errors=True) + + +@pytest.fixture(scope="module") +def clean_venv(workspace: Path) -> Path: + """A fresh venv, pip upgraded. Returns its python executable path.""" + venv_dir = workspace / "venv" + _run([sys.executable, "-m", "venv", str(venv_dir)], timeout=180) + py = _venv_python(venv_dir) + assert py.exists(), f"venv python not found at {py}" + _run([str(py), "-m", "pip", "install", "--upgrade", "pip", "--quiet"], timeout=180) + return py + + +@pytest.fixture(scope="module") +def isolated_cache_env(workspace: Path) -> dict: + """Environment dict pointing the wrapper at a private cache dir so this + test never reads or pollutes the developer's real cache.""" + cache = workspace / "cache" + cache.mkdir(exist_ok=True) + env = os.environ.copy() + env["INVISIBLE_PLAYWRIGHT_CACHE_DIR"] = str(cache) + env["XDG_CACHE_HOME"] = str(cache) + return env + + +# ---------- tests ----------------------------------------------------------- # + + +@pytest.mark.e2e +def test_clean_install_from_git_main(clean_venv: Path): + """The package installs cleanly from git+HTTPS in a pristine venv.""" + url = f"git+{REPO_URL}@{REV}" + _run([str(clean_venv), "-m", "pip", "install", url], timeout=600) + + # Importability check — catches missing __init__ exports, broken syntax, + # missing runtime deps. + out = _run( + [str(clean_venv), "-c", + "import invisible_playwright as ip; " + "print('OK', ip.__name__)"], + timeout=30, + ) + assert "OK invisible_playwright" in out.stdout + + +@pytest.mark.e2e +def test_version_command_reports_wrapper_and_binary(clean_venv: Path): + """`python -m invisible_playwright --version` runs and reports both the + wrapper version and the BINARY_VERSION it'll try to fetch.""" + out = _run( + [str(clean_venv), "-m", "invisible_playwright", "--version"], + timeout=30, + ) + text = out.stdout + out.stderr + assert "firefox-" in text, f"BINARY_VERSION not reported: {text!r}" + + +@pytest.mark.e2e +def test_fetch_against_live_release(clean_venv: Path, isolated_cache_env: dict): + """Hit the LIVE GitHub release: download tarball + checksums.txt, parse, + SHA256-verify, extract. This is the regression sentinel for #15. + + If checksums.txt is shipped in `*`-prefixed (binary) format and the parser + keeps the `*` in the key, this raises + RuntimeError: no SHA256 for {asset} in checksums.txt + """ + out = _run( + [str(clean_venv), "-m", "invisible_playwright", "fetch", "--force"], + env=isolated_cache_env, + timeout=900, # 110 MB download + extract on slow connections + ) + output = out.stdout + out.stderr + # Anti-regression for #15: this exact string would surface if the parser + # broke again. Spell it out so a future failure is grep-able to the issue. + assert "no SHA256 for" not in output, ( + "Issue #15 regression: parser couldn't find SHA for the asset.\n" + f"Output:\n{output[-2000:]}" + ) + assert "SHA256 mismatch" not in output, ( + "Tarball SHA doesn't match the published checksums.txt — " + "either the upload was corrupted or the release was re-packed " + "without updating checksums.txt." + ) + + +@pytest.mark.e2e +def test_binary_executes_after_fetch(clean_venv: Path, isolated_cache_env: dict): + """After fetch, the binary cache contains a launchable Firefox.""" + out = _run( + [str(clean_venv), "-c", + "from invisible_playwright.download import ensure_binary; " + "p = ensure_binary(); print('BINARY', p)"], + env=isolated_cache_env, + timeout=60, + ) + binary_line = [l for l in out.stdout.splitlines() if l.startswith("BINARY ")] + assert binary_line, f"ensure_binary() didn't print path: {out.stdout!r}" + binary_path = Path(binary_line[0].split(" ", 1)[1]) + assert binary_path.exists(), f"binary missing: {binary_path}" + + # `firefox --version` exit code is enough; output format differs across + # platforms (Win shows nothing on stdout, Linux prints to stdout). + # On Linux invoke via WSL when running from Windows. + if os.name == "nt" and binary_path.suffix == "": + # Linux binary path on Windows host — skip launch, the previous + # ensure_binary() already proved cache landed correctly. + pytest.skip("Cross-platform binary launch from Windows requires WSL.") + r = subprocess.run([str(binary_path), "--version"], + capture_output=True, text=True, timeout=30) + text = (r.stdout + r.stderr).lower() + assert "firefox" in text and "150." in text, ( + f"binary --version didn't report Firefox 150: rc={r.returncode} " + f"out={r.stdout!r} err={r.stderr!r}" + ) + + +@pytest.mark.e2e +@pytest.mark.linux_only +def test_playwright_launch_against_real_site(clean_venv: Path, + isolated_cache_env: dict): + """Full stack: launch the patched Firefox via the wrapper, navigate to a + real URL, evaluate JS. Catches Juggler protocol drift, profile-generation + bugs, locale handling regressions, prefs typos.""" + if sys.platform.startswith("win"): + pytest.skip("Headless launch path requires display server (skip on Win).") + + script = ( + "from invisible_playwright import InvisiblePlaywright\n" + "with InvisiblePlaywright(headless=True, seed=42) as browser:\n" + " ctx = browser.new_context()\n" + " page = ctx.new_page()\n" + " page.goto('https://example.com', timeout=30000)\n" + " title = page.title()\n" + " ua = page.evaluate('navigator.userAgent')\n" + " print('TITLE=' + title)\n" + " print('UA=' + ua)\n" + ) + out = _run([str(clean_venv), "-c", script], + env=isolated_cache_env, timeout=180) + assert "TITLE=Example Domain" in out.stdout, ( + f"page.title() didn't return expected text:\n{out.stdout[-1000:]}" + ) + assert "UA=" in out.stdout and "Firefox/150" in out.stdout, ( + "navigator.userAgent doesn't report Firefox/150 — UA spoofing " + f"regression?\n{out.stdout[-1000:]}" + ) + + +# ---------- meta: verify the test markers themselves work ------------------- # + + +@pytest.mark.e2e +def test_e2e_marker_is_excluded_by_default(): + """Sanity check on pyproject.toml's `addopts = '-m not e2e'` — this test + only runs when `-m e2e` is passed explicitly. If you're reading this in + a normal pytest run, the addopts filter is broken.""" + assert True