From ae0caad292e0e596d5105edfdd7a141046a57b5f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 6 May 2026 17:21:40 +0530 Subject: [PATCH] test(web): add shared Playwright E2E helpers and search-space fixture --- surfsense_web/tests/README.md | 113 ++++++++++ .../tests/fixtures/search-space.fixture.ts | 51 +++++ surfsense_web/tests/helpers/api/auth.ts | 55 +++++ surfsense_web/tests/helpers/api/connectors.ts | 193 ++++++++++++++++++ surfsense_web/tests/helpers/api/documents.ts | 40 ++++ .../tests/helpers/api/search-spaces.ts | 42 ++++ surfsense_web/tests/helpers/canary.ts | 49 +++++ .../tests/helpers/ui/connector-popup.ts | 34 +++ surfsense_web/tests/helpers/ui/dashboard.ts | 21 ++ surfsense_web/tests/helpers/waits/indexing.ts | 75 +++++++ 10 files changed, 673 insertions(+) create mode 100644 surfsense_web/tests/README.md create mode 100644 surfsense_web/tests/fixtures/search-space.fixture.ts create mode 100644 surfsense_web/tests/helpers/api/auth.ts create mode 100644 surfsense_web/tests/helpers/api/connectors.ts create mode 100644 surfsense_web/tests/helpers/api/documents.ts create mode 100644 surfsense_web/tests/helpers/api/search-spaces.ts create mode 100644 surfsense_web/tests/helpers/canary.ts create mode 100644 surfsense_web/tests/helpers/ui/connector-popup.ts create mode 100644 surfsense_web/tests/helpers/ui/dashboard.ts create mode 100644 surfsense_web/tests/helpers/waits/indexing.ts diff --git a/surfsense_web/tests/README.md b/surfsense_web/tests/README.md new file mode 100644 index 000000000..51fd35050 --- /dev/null +++ b/surfsense_web/tests/README.md @@ -0,0 +1,113 @@ +# Playwright E2E Suite + +End-to-end tests for the full SurfSense stack (Next.js + FastAPI + +Celery + Postgres + Redis). Designed to scale from one connector +(Composio Drive in Phase 1) to every connector + manual file upload +without rewriting the harness. + +## Layout + +``` +tests/ +├── auth.setup.ts # one-time login, persists localStorage +├── smoke/ # tracer-bullet tests (dashboard renders) +├── connectors/ +│ └── composio/ +│ └── drive/ # Composio Google Drive — Phase 1 +│ └── journey.spec.ts # connect -> select -> index -> canary assertion +├── fixtures/ # test.extend() fixtures +│ ├── index.ts # named `test` exports per spec category +│ ├── search-space.fixture.ts # apiToken + per-test search space +│ └── connectors/ +│ └── composio-drive.fixture.ts +├── helpers/ # reusable building blocks +│ ├── api/ # backend HTTP helpers +│ ├── ui/ # page-object selectors +│ ├── waits/ # deterministic polling +│ └── canary.ts # canary tokens + fixed Drive file ids +└── README.md # this file +``` + +## How the deterministic harness works + +There are **three layers of defense** against accidental real-world +calls. None of them touch production code. + +1. `surfsense_backend/tests/e2e/run_backend.py` and `run_celery.py` are + separate entrypoints (not used by `python main.py`). They hijack + `sys.modules["composio"]` BEFORE importing the app, swap in strict + fakes for `langchain_litellm`/`langchain_openai`, and mount the + `X-E2E-Scenario` middleware. +2. The fakes themselves are **strict**: every class implements + `__getattr__` that raises `NotImplementedError` on unknown surface. + Adding a new SDK call site without updating the fake fails CI loudly. +3. CI sets `HTTPS_PROXY=http://127.0.0.1:1` plus sentinel API keys + (`COMPOSIO_API_KEY=e2e-deny-real-call-sentinel`). Any leaked outbound + HTTP call fails before reaching the network. + +## Running locally + +```bash +# 1. Bring up Postgres + Redis (Docker compose, supabase, whatever you use) +docker compose up -d postgres redis + +# 2. Backend with E2E entrypoint (note: NOT `uv run main.py`) +cd surfsense_backend +uv run alembic upgrade head +uv run python tests/e2e/run_backend.py & + +# 3. Celery worker with the same entrypoint pattern +uv run python tests/e2e/run_celery.py & + +# 4. Run Playwright tests (auto-starts `pnpm dev` via webServer config) +cd ../surfsense_web +pnpm test:e2e +``` + +For CI behavior in one go: `pnpm test:e2e:headless`. + +To debug the Drive journey: `pnpm test:e2e -- connectors/composio/drive/journey.spec.ts --headed`. + +## Adding a new connector + +The directory tree is designed so a new connector lives mostly inside +its own folder. E2E is scoped to **one user expectation per connector**: +the smallest browser journey that proves the user-visible outcome works. +Follow this checklist: + +1. **Backend fake.** Add a new file under + `surfsense_backend/tests/e2e/fakes/_module.py` mirroring + `composio_module.py`. Use `__getattr__` to raise on unknown surface. +2. **Hijack.** Wire the new module into `run_backend.py` and + `run_celery.py` with `sys.modules[""] = `. +3. **Backend tests.** Put edge cases in backend tests, not Playwright: + OAuth state validation in unit tests, and route/error branches in + `surfsense_backend/tests/integration//`. +4. **Fixtures.** Drop a fixture file into `tests/fixtures/connectors/` + that returns a pre-connected connector row. +5. **Journey spec.** Create exactly one + `tests/connectors///journey.spec.ts` for the user + expectation. For indexable connectors this usually means + connect -> select scope -> index -> assert canary content. For + connection-only connectors this means connect -> assert connected badge. +6. **Update this README's directory diagram.** + +Do not add separate Playwright specs for expired OAuth state, duplicate +connectors, auth-expired classification, or route config persistence. +Those belong in backend unit/integration tests such as +`surfsense_backend/tests/unit/utils/test_oauth_security.py` and +`surfsense_backend/tests/integration/composio/`. + +## Why API-driven? + +Journey specs prefer a thin browser assertion followed by API-driven +configuration/indexing because: + +- It keeps tests **deterministic** (no waiting on UI animation, + React hydration, or Next.js compile time). +- It exercises the **same backend code path** the UI eventually calls. +- The expensive E2E assertion stays focused on what only E2E can prove: + the cross-process seam from connector -> Celery -> indexing -> DB. + +UI-only tests live under `helpers/ui/` for future Phase 2 work +(folder-tree drag-and-drop, indexing options switches, etc.). diff --git a/surfsense_web/tests/fixtures/search-space.fixture.ts b/surfsense_web/tests/fixtures/search-space.fixture.ts new file mode 100644 index 000000000..7773aa60c --- /dev/null +++ b/surfsense_web/tests/fixtures/search-space.fixture.ts @@ -0,0 +1,51 @@ +import { test as base } from "@playwright/test"; +import { loginAsTestUser } from "../helpers/api/auth"; +import { uniqueSearchSpaceName } from "../helpers/canary"; +import { + createSearchSpace, + deleteSearchSpace, + type SearchSpaceRow, +} from "../helpers/api/search-spaces"; + +export type SearchSpaceFixtures = { + /** + * Bearer token for the seeded test user. Worker-scoped so we only + * log in once per worker (logins are cheap, but caching is cheaper). + */ + apiToken: string; + /** + * A fresh, named search space for the current test. Cleaned up + * automatically after the test finishes. + */ + searchSpace: SearchSpaceRow; +}; + +export const searchSpaceFixtures = base.extend({ + apiTokenWorker: [ + async ({ playwright }, use) => { + const ctx = await playwright.request.newContext(); + try { + const token = await loginAsTestUser(ctx); + await use(token); + } finally { + await ctx.dispose(); + } + }, + { scope: "worker" }, + ], + apiToken: async ({ apiTokenWorker }, use) => { + await use(apiTokenWorker); + }, + searchSpace: async ({ request, apiToken }, use) => { + const space = await createSearchSpace( + request, + apiToken, + uniqueSearchSpaceName("composio-drive-e2e") + ); + try { + await use(space); + } finally { + await deleteSearchSpace(request, apiToken, space.id); + } + }, +}); diff --git a/surfsense_web/tests/helpers/api/auth.ts b/surfsense_web/tests/helpers/api/auth.ts new file mode 100644 index 000000000..f7bd697fd --- /dev/null +++ b/surfsense_web/tests/helpers/api/auth.ts @@ -0,0 +1,55 @@ +import type { APIRequestContext } from "@playwright/test"; + +/** + * Direct backend auth helper. Uses the same /auth/jwt/login endpoint the + * UI uses; mirrors lib/apis/auth-api.service.ts. + * + * Returns a bearer token specs can attach to API calls when they don't + * want to go through the browser. The browser-side auth (localStorage) + * is set up separately by tests/auth.setup.ts. + */ + +export const BACKEND_URL = + process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + +const TEST_USER_EMAIL = process.env.PLAYWRIGHT_TEST_EMAIL || "test@surfsense.net"; +const TEST_USER_PASSWORD = process.env.PLAYWRIGHT_TEST_PASSWORD || "TestPassword123!"; + +export async function loginAsTestUser(request: APIRequestContext): Promise { + const response = await request.post(`${BACKEND_URL}/auth/jwt/login`, { + form: { + username: TEST_USER_EMAIL, + password: TEST_USER_PASSWORD, + grant_type: "password", + }, + headers: { "Content-Type": "application/x-www-form-urlencoded" }, + }); + + if (!response.ok()) { + throw new Error( + `Login to ${BACKEND_URL}/auth/jwt/login failed (${response.status()}): ${await response.text()}` + ); + } + + const { access_token } = (await response.json()) as { access_token: string }; + if (!access_token) { + throw new Error("Backend response missing access_token"); + } + return access_token; +} + +/** + * Standard auth headers for backend API calls. Optionally injects an + * X-E2E-Scenario header that the test-only ScenarioMiddleware in + * surfsense_backend/tests/e2e/run_backend.py reads to flip fake behavior. + */ +export function authHeaders( + token: string, + extra?: Record +): Record { + return { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", + ...(extra ?? {}), + }; +} diff --git a/surfsense_web/tests/helpers/api/connectors.ts b/surfsense_web/tests/helpers/api/connectors.ts new file mode 100644 index 000000000..3e270f2dc --- /dev/null +++ b/surfsense_web/tests/helpers/api/connectors.ts @@ -0,0 +1,193 @@ +import type { APIRequestContext } from "@playwright/test"; +import { authHeaders, BACKEND_URL } from "./auth"; + +export type ConnectorRow = { + id: number; + name: string; + connector_type: string; + config: Record; + last_indexed_at: string | null; + is_indexable: boolean; +}; + +export async function listConnectors( + request: APIRequestContext, + token: string, + searchSpaceId: number +): Promise { + const response = await request.get( + `${BACKEND_URL}/api/v1/search-source-connectors?search_space_id=${searchSpaceId}`, + { headers: authHeaders(token) } + ); + if (!response.ok()) { + throw new Error( + `listConnectors failed (${response.status()}): ${await response.text()}` + ); + } + const data = await response.json(); + return Array.isArray(data) ? data : (data?.items ?? []); +} + +export async function getConnector( + request: APIRequestContext, + token: string, + connectorId: number +): Promise { + const response = await request.get( + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + { headers: authHeaders(token) } + ); + if (!response.ok()) { + throw new Error( + `getConnector(${connectorId}) failed (${response.status()}): ${await response.text()}` + ); + } + return (await response.json()) as ConnectorRow; +} + +export async function updateConnectorConfig( + request: APIRequestContext, + token: string, + connectorId: number, + config: Record +): Promise { + const response = await request.put( + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + { headers: authHeaders(token), data: { config } } + ); + if (!response.ok()) { + throw new Error( + `updateConnectorConfig(${connectorId}) failed (${response.status()}): ${await response.text()}` + ); + } + return (await response.json()) as ConnectorRow; +} + +export async function deleteConnector( + request: APIRequestContext, + token: string, + connectorId: number +): Promise { + const response = await request.delete( + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + { headers: authHeaders(token) } + ); + if (!response.ok() && response.status() !== 404) { + throw new Error( + `deleteConnector(${connectorId}) failed (${response.status()}): ${await response.text()}` + ); + } +} + +export async function listComposioDriveFolders( + request: APIRequestContext, + token: string, + connectorId: number, + parentId?: string +): Promise<{ items: Array> }> { + const url = parentId + ? `${BACKEND_URL}/api/v1/connectors/${connectorId}/composio-drive/folders?parent_id=${encodeURIComponent(parentId)}` + : `${BACKEND_URL}/api/v1/connectors/${connectorId}/composio-drive/folders`; + const response = await request.get(url, { + headers: authHeaders(token), + }); + if (!response.ok()) { + throw new Error( + `listComposioDriveFolders(${connectorId}) failed (${response.status()}): ${await response.text()}` + ); + } + return (await response.json()) as { items: Array> }; +} + +export type IndexBody = { + folders?: Array<{ id: string; name: string; mimeType: string }>; + files?: Array<{ id: string; name: string; mimeType: string }>; + indexing_options?: { + max_files_per_folder?: number; + incremental_sync?: boolean; + include_subfolders?: boolean; + }; +}; + +export async function triggerIndex( + request: APIRequestContext, + token: string, + connectorId: number, + searchSpaceId: number, + body: IndexBody +): Promise<{ ok: true }> { + const response = await request.post( + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}/index?search_space_id=${searchSpaceId}`, + { headers: authHeaders(token), data: body } + ); + if (!response.ok()) { + throw new Error( + `triggerIndex(${connectorId}) failed (${response.status()}): ${await response.text()}` + ); + } + return { ok: true }; +} + +/** + * Drives the OAuth flow for a Composio toolkit programmatically. + * + * Steps mirror what the UI does (see use-connector-dialog.ts): + * 1) GET /api/v1/auth/composio/connector/add?space_id=&toolkit_id= -> { auth_url } + * 2) Follow the auth_url (which the E2E fake makes same-origin so it + * lands on the callback directly with ?connectedAccountId=...). + * 3) Backend creates the connector and redirects to the frontend + * success page. + * + * Returns the newly-created (or reconnected) connector row. + */ +export async function runComposioOAuth( + request: APIRequestContext, + token: string, + searchSpaceId: number, + toolkitId: "googledrive" | "gmail" | "googlecalendar" = "googledrive" +): Promise<{ + authUrl: string; + finalUrl: string; + connector: ConnectorRow | null; +}> { + // Step 1: kick off OAuth, get auth_url. + const initiateResp = await request.get( + `${BACKEND_URL}/api/v1/auth/composio/connector/add?space_id=${searchSpaceId}&toolkit_id=${toolkitId}`, + { + headers: authHeaders(token), + } + ); + if (!initiateResp.ok()) { + throw new Error( + `composio initiate failed (${initiateResp.status()}): ${await initiateResp.text()}` + ); + } + const { auth_url } = (await initiateResp.json()) as { auth_url: string }; + if (!auth_url) { + throw new Error("composio initiate response missing auth_url"); + } + + // Step 2: follow the auth_url. The fake makes this same-origin and + // pointing at the callback. Use maxRedirects=0 so we can inspect + // the final redirect target manually. + const callbackResp = await request.get(auth_url, { + headers: authHeaders(token), + maxRedirects: 0, + failOnStatusCode: false, + }); + + const location = callbackResp.headers().location ?? auth_url; + + // Step 3: look up the resulting connector (if any). + const connectors = await listConnectors(request, token, searchSpaceId); + const composioType = + toolkitId === "googledrive" + ? "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" + : toolkitId === "gmail" + ? "COMPOSIO_GMAIL_CONNECTOR" + : "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"; + const connector = + connectors.find((c) => c.connector_type === composioType) ?? null; + + return { authUrl: auth_url, finalUrl: location, connector }; +} diff --git a/surfsense_web/tests/helpers/api/documents.ts b/surfsense_web/tests/helpers/api/documents.ts new file mode 100644 index 000000000..2276327bf --- /dev/null +++ b/surfsense_web/tests/helpers/api/documents.ts @@ -0,0 +1,40 @@ +import type { APIRequestContext } from "@playwright/test"; +import { authHeaders, BACKEND_URL } from "./auth"; + +export type DocumentRow = { + id: number; + title: string; + content: string; + document_type: string; + status: { state?: string } | string; +}; + +type Paginated = { + items?: T[]; + total?: number; +}; + +export async function listDocuments( + request: APIRequestContext, + token: string, + searchSpaceId: number, + limit = 100 +): Promise { + const response = await request.get( + `${BACKEND_URL}/api/v1/documents?search_space_id=${searchSpaceId}&limit=${limit}`, + { headers: authHeaders(token) } + ); + if (!response.ok()) { + throw new Error( + `listDocuments failed (${response.status()}): ${await response.text()}` + ); + } + const body = (await response.json()) as Paginated | DocumentRow[]; + return Array.isArray(body) ? body : (body.items ?? []); +} + +export function isDocumentReady(doc: DocumentRow): boolean { + const state = + typeof doc.status === "string" ? doc.status : doc.status?.state; + return state === "ready" || state === "READY"; +} diff --git a/surfsense_web/tests/helpers/api/search-spaces.ts b/surfsense_web/tests/helpers/api/search-spaces.ts new file mode 100644 index 000000000..e9b884b72 --- /dev/null +++ b/surfsense_web/tests/helpers/api/search-spaces.ts @@ -0,0 +1,42 @@ +import type { APIRequestContext } from "@playwright/test"; +import { authHeaders, BACKEND_URL } from "./auth"; + +export type SearchSpaceRow = { + id: number; + name: string; + description: string | null; +}; + +export async function createSearchSpace( + request: APIRequestContext, + token: string, + name: string, + description = "E2E test search space" +): Promise { + const response = await request.post(`${BACKEND_URL}/api/v1/searchspaces`, { + headers: authHeaders(token), + data: { name, description }, + }); + if (!response.ok()) { + throw new Error( + `createSearchSpace failed (${response.status()}): ${await response.text()}` + ); + } + return (await response.json()) as SearchSpaceRow; +} + +export async function deleteSearchSpace( + request: APIRequestContext, + token: string, + id: number +): Promise { + const response = await request.delete(`${BACKEND_URL}/api/v1/searchspaces/${id}`, { + headers: authHeaders(token), + }); + if (!response.ok() && response.status() !== 404) { + // 404 is acceptable: the test may have already deleted the space. + throw new Error( + `deleteSearchSpace(${id}) failed (${response.status()}): ${await response.text()}` + ); + } +} diff --git a/surfsense_web/tests/helpers/canary.ts b/surfsense_web/tests/helpers/canary.ts new file mode 100644 index 000000000..8033065c8 --- /dev/null +++ b/surfsense_web/tests/helpers/canary.ts @@ -0,0 +1,49 @@ +import { randomUUID } from "node:crypto"; + +/** + * Canary tokens & deterministic test data. + * + * Embedded by the backend Composio fake into fake Drive file contents + * (see surfsense_backend/tests/e2e/fakes/fixtures/drive_files.json). + * Specs assert these strings appear in the resulting Document rows to + * prove the indexing pipeline ran end-to-end. + * + * Each token is a stable string keyed by file id so multi-test runs + * remain deterministic and the resulting Document.content is greppable + * in failure traces. + */ +export const CANARY_TOKENS = { + driveCanaryFile: "SURFSENSE_E2E_CANARY_TOKEN_DRIVE_001", + driveReadme: "SURFSENSE_E2E_README_MARKER", + driveBudget: "SURFSENSE_E2E_BUDGET_MARKER", + driveRoadmap: "SURFSENSE_E2E_ROADMAP_MARKER", + driveArchive: "SURFSENSE_E2E_ARCHIVE_MARKER", +} as const; + +/** + * Fake Drive file IDs that match what the backend fake returns from + * GOOGLEDRIVE_LIST_FILES. Keep in sync with drive_files.json. + */ +export const FAKE_DRIVE_FILES = { + canary: { id: "fake-file-canary", name: "e2e-canary.txt", mimeType: "text/plain" }, + readme: { id: "fake-file-readme", name: "README.md", mimeType: "text/markdown" }, + budget: { id: "fake-file-budget", name: "Q1-Budget.csv", mimeType: "text/csv" }, +} as const; + +export const FAKE_DRIVE_FOLDERS = { + projects: { + id: "fake-folder-projects", + name: "Projects", + mimeType: "application/vnd.google-apps.folder", + }, + archive: { + id: "fake-folder-archive", + name: "Archive", + mimeType: "application/vnd.google-apps.folder", + }, +} as const; + +/** Generate a unique-per-run search space name. Keeps parallel tests isolated. */ +export function uniqueSearchSpaceName(prefix = "e2e"): string { + return `${prefix}-${randomUUID().slice(0, 8)}`; +} diff --git a/surfsense_web/tests/helpers/ui/connector-popup.ts b/surfsense_web/tests/helpers/ui/connector-popup.ts new file mode 100644 index 000000000..19e271064 --- /dev/null +++ b/surfsense_web/tests/helpers/ui/connector-popup.ts @@ -0,0 +1,34 @@ +import type { Page } from "@playwright/test"; +import { expect } from "@playwright/test"; + +/** + * Page-object-style helpers for the connector dialog rendered by + * components/assistant-ui/connector-popup.tsx. + * + * Kept minimal in Phase 1: most spec interactions go through API + * fixtures for determinism. UI-driven coverage of every connector card + * is a Phase 2 task and will use this helper as the entry point. + */ + +export async function openConnectorPopup(page: Page): Promise { + // Label depends on whether the user already has connectors. + const trigger = page + .getByRole("button", { name: "Manage connectors" }) + .or(page.getByRole("button", { name: "Connect your connectors" })) + .first(); + + // Long timeout absorbs Next.js dev cold-compile of the new-chat route. + await expect(trigger).toBeVisible({ timeout: 60_000 }); + await trigger.click(); + + await expect(page.getByRole("dialog", { name: "Manage Connectors" })).toBeVisible(); +} + +export async function clickComposioDriveCard(page: Page): Promise { + const composioDriveCard = page.getByText("Search your Drive files via Composio"); + await composioDriveCard.scrollIntoViewIfNeeded(); + const card = composioDriveCard + .locator("xpath=ancestor::*[self::article or self::div][1]") + .first(); + await card.getByRole("button", { name: "Connect" }).click(); +} diff --git a/surfsense_web/tests/helpers/ui/dashboard.ts b/surfsense_web/tests/helpers/ui/dashboard.ts new file mode 100644 index 000000000..df4c67823 --- /dev/null +++ b/surfsense_web/tests/helpers/ui/dashboard.ts @@ -0,0 +1,21 @@ +import type { Page } from "@playwright/test"; +import { expect } from "@playwright/test"; + +/** + * Navigation helpers for dashboard routes. Centralized so that future + * route changes only require an update in one place. + */ + +export function newChatUrl(searchSpaceId: number): string { + return `/dashboard/${searchSpaceId}/new-chat`; +} + +export function connectorsCallbackUrl(searchSpaceId: number): string { + return `/dashboard/${searchSpaceId}/connectors/callback`; +} + +export async function gotoNewChat(page: Page, searchSpaceId: number): Promise { + const target = newChatUrl(searchSpaceId); + await page.goto(target, { waitUntil: "domcontentloaded" }); + await expect(page).toHaveURL((url) => url.pathname === target); +} diff --git a/surfsense_web/tests/helpers/waits/indexing.ts b/surfsense_web/tests/helpers/waits/indexing.ts new file mode 100644 index 000000000..d60ab38e0 --- /dev/null +++ b/surfsense_web/tests/helpers/waits/indexing.ts @@ -0,0 +1,75 @@ +import type { APIRequestContext } from "@playwright/test"; +import { getConnector } from "../api/connectors"; +import { isDocumentReady, listDocuments } from "../api/documents"; + +/** + * Polls the backend until a connector finishes indexing OR the deadline + * passes. Replaces `waitForTimeout` (which is a Playwright anti-pattern) + * with deterministic polling on real signals. + */ +export async function waitForIndexingComplete( + request: APIRequestContext, + token: string, + connectorId: number, + searchSpaceId: number, + options: { timeoutMs?: number; intervalMs?: number; minDocuments?: number } = {} +): Promise { + const timeoutMs = options.timeoutMs ?? 60_000; + const intervalMs = options.intervalMs ?? 1_000; + const minDocuments = options.minDocuments ?? 1; + + const startedAt = Date.now(); + let lastState = "unknown"; + + while (Date.now() - startedAt < timeoutMs) { + const connector = await getConnector(request, token, connectorId); + const docs = await listDocuments(request, token, searchSpaceId); + const readyDocs = docs.filter(isDocumentReady); + + const connectorIndexed = connector.last_indexed_at !== null; + const enoughReady = readyDocs.length >= minDocuments; + + if (connectorIndexed && enoughReady) { + return; + } + + lastState = `last_indexed_at=${connector.last_indexed_at} ready_docs=${readyDocs.length}/${minDocuments}`; + await new Promise((resolve) => setTimeout(resolve, intervalMs)); + } + + throw new Error( + `waitForIndexingComplete: timed out after ${timeoutMs}ms waiting for ` + + `connector ${connectorId} in space ${searchSpaceId}. Last observed: ${lastState}` + ); +} + +/** + * Polls until the given document title appears in the search space with + * status=ready. Useful when a spec wants to assert on a specific file + * by name rather than count. + */ +export async function waitForDocumentByTitle( + request: APIRequestContext, + token: string, + searchSpaceId: number, + title: string, + options: { timeoutMs?: number; intervalMs?: number } = {} +): Promise { + const timeoutMs = options.timeoutMs ?? 60_000; + const intervalMs = options.intervalMs ?? 1_000; + const startedAt = Date.now(); + + while (Date.now() - startedAt < timeoutMs) { + const docs = await listDocuments(request, token, searchSpaceId); + const match = docs.find((d) => d.title === title && isDocumentReady(d)); + if (match) { + return; + } + await new Promise((resolve) => setTimeout(resolve, intervalMs)); + } + + throw new Error( + `waitForDocumentByTitle: timed out after ${timeoutMs}ms waiting for ` + + `title=${JSON.stringify(title)} in space ${searchSpaceId}.` + ); +}