mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-12 01:02:39 +02:00
test(backend): enhance Drive file filtering and add unit tests for _drive_list_files
This commit is contained in:
parent
074b06441f
commit
8536bac29a
5 changed files with 139 additions and 24 deletions
|
|
@ -320,15 +320,54 @@ def _drive_list_files(args: dict[str, Any]) -> dict[str, Any]:
|
|||
except IndexError:
|
||||
folder_id = "root"
|
||||
|
||||
files = _DRIVE_FIXTURE.get(folder_id, [])
|
||||
files = _filter_drive_files_for_query(q, _DRIVE_FIXTURE.get(folder_id, []))
|
||||
return {
|
||||
"data": {
|
||||
"files": list(files),
|
||||
"files": files,
|
||||
"nextPageToken": None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _extract_quoted_value(q: str, anchor: str) -> str | None:
|
||||
anchor_idx = q.find(anchor)
|
||||
if anchor_idx == -1:
|
||||
return None
|
||||
|
||||
after_anchor = q[anchor_idx + len(anchor) :]
|
||||
first_quote_idx = after_anchor.find("'")
|
||||
if first_quote_idx == -1:
|
||||
return None
|
||||
|
||||
after_first_quote = after_anchor[first_quote_idx + 1 :]
|
||||
second_quote_idx = after_first_quote.find("'")
|
||||
if second_quote_idx == -1:
|
||||
return None
|
||||
|
||||
return after_first_quote[:second_quote_idx]
|
||||
|
||||
|
||||
def _filter_drive_files_for_query(q: str, files: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
filtered = list(files)
|
||||
|
||||
if "trashed = false" in q:
|
||||
filtered = [entry for entry in filtered if entry.get("trashed") is not True]
|
||||
|
||||
excluded_mime_type = _extract_quoted_value(q, "mimeType !=")
|
||||
if excluded_mime_type:
|
||||
filtered = [
|
||||
entry for entry in filtered if entry.get("mimeType") != excluded_mime_type
|
||||
]
|
||||
|
||||
included_mime_type = _extract_quoted_value(q, "mimeType =")
|
||||
if included_mime_type:
|
||||
filtered = [
|
||||
entry for entry in filtered if entry.get("mimeType") == included_mime_type
|
||||
]
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def _drive_download_file(args: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Mimic GOOGLEDRIVE_DOWNLOAD_FILE.
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,21 @@
|
|||
"mimeType": "text/csv",
|
||||
"modifiedTime": "2025-01-25T13:45:00.000Z",
|
||||
"createdTime": "2025-01-25T13:45:00.000Z"
|
||||
},
|
||||
{
|
||||
"id": "fake-shortcut-canary",
|
||||
"name": "Shortcut to Canary",
|
||||
"mimeType": "application/vnd.google-apps.shortcut",
|
||||
"modifiedTime": "2025-02-10T12:00:00.000Z",
|
||||
"createdTime": "2025-02-10T12:00:00.000Z"
|
||||
},
|
||||
{
|
||||
"id": "fake-file-trashed",
|
||||
"name": "trashed-e2e-note.txt",
|
||||
"mimeType": "text/plain",
|
||||
"modifiedTime": "2025-02-11T09:00:00.000Z",
|
||||
"createdTime": "2025-02-11T09:00:00.000Z",
|
||||
"trashed": true
|
||||
}
|
||||
],
|
||||
"fake-folder-projects": [
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
from tests.e2e.fakes.composio_module import _drive_list_files
|
||||
|
||||
|
||||
def _ids(result: dict) -> set[str]:
|
||||
return {item["id"] for item in result["data"]["files"]}
|
||||
|
||||
|
||||
def test_drive_list_files_filters_shortcuts_and_trashed_items():
|
||||
result = _drive_list_files(
|
||||
{
|
||||
"q": (
|
||||
"'root' in parents and trashed = false and "
|
||||
"mimeType != 'application/vnd.google-apps.shortcut'"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
ids = _ids(result)
|
||||
|
||||
assert "fake-file-canary" in ids
|
||||
assert "fake-shortcut-canary" not in ids
|
||||
assert "fake-file-trashed" not in ids
|
||||
|
||||
|
||||
def test_drive_list_files_filters_to_exact_mime_type():
|
||||
result = _drive_list_files(
|
||||
{"q": "'root' in parents and trashed = false and mimeType = 'text/plain'"}
|
||||
)
|
||||
|
||||
assert _ids(result) == {"fake-file-canary"}
|
||||
|
||||
|
||||
def test_drive_list_files_uses_requested_parent_folder():
|
||||
result = _drive_list_files(
|
||||
{"q": "'fake-folder-projects' in parents and trashed = false"}
|
||||
)
|
||||
|
||||
assert _ids(result) == {"fake-file-roadmap"}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import { composioDriveTest as test, expect } from "../../../fixtures";
|
||||
import { listConnectors, triggerIndex, updateConnectorConfig } from "../../../helpers/api/connectors";
|
||||
import { listDocuments } from "../../../helpers/api/documents";
|
||||
import { getEditorContent, listDocuments } from "../../../helpers/api/documents";
|
||||
import { CANARY_TOKENS, FAKE_DRIVE_FILES } from "../../../helpers/canary";
|
||||
import { openConnectorPopup } from "../../../helpers/ui/connector-popup";
|
||||
import {
|
||||
|
|
@ -9,19 +9,11 @@ import {
|
|||
} from "../../../helpers/waits/indexing";
|
||||
|
||||
/**
|
||||
* Composio Drive user journey.
|
||||
* Proves the Drive wiring from OAuth fixture -> selection persistence ->
|
||||
* indexing -> stored source_markdown -> editor-content retrieval.
|
||||
*
|
||||
* User expectation:
|
||||
* "I connect Google Drive, choose the files/folders I care about,
|
||||
* wait for indexing, and then my Drive content is available in SurfSense."
|
||||
*
|
||||
* The OAuth connection is handled by the composioDriveConnector fixture so
|
||||
* this test can focus on the user-visible expectation. The spec still touches
|
||||
* the browser (dashboard + connector dialog) and then uses API helpers for
|
||||
* selection/indexing to keep the expensive pipeline assertion deterministic.
|
||||
*
|
||||
* If this passes, the seam from Composio connection -> selection persistence ->
|
||||
* Celery indexing -> document storage is wired correctly.
|
||||
* UI-driven file selection, chat retrieval, and LLM/embedding quality are
|
||||
* covered by later phases or narrower tests.
|
||||
*/
|
||||
test.describe("Composio Drive journey", () => {
|
||||
test(
|
||||
|
|
@ -33,11 +25,9 @@ test.describe("Composio Drive journey", () => {
|
|||
waitUntil: "domcontentloaded",
|
||||
});
|
||||
await openConnectorPopup(page);
|
||||
await expect(
|
||||
page
|
||||
.getByRole("dialog", { name: "Manage Connectors" })
|
||||
.getByText("Search your Drive files via Composio")
|
||||
).toBeVisible();
|
||||
const connectorDialog = page.getByRole("dialog", { name: "Manage Connectors" });
|
||||
await expect(connectorDialog).toBeVisible();
|
||||
await expect(connectorDialog.getByRole("button", { name: "Manage" })).toBeVisible();
|
||||
|
||||
await updateConnectorConfig(request, apiToken, composioDriveConnector.id, {
|
||||
...composioDriveConnector.config,
|
||||
|
|
@ -89,13 +79,17 @@ test.describe("Composio Drive journey", () => {
|
|||
const canaryDoc = docs.find((d) => d.title === FAKE_DRIVE_FILES.canary.name);
|
||||
|
||||
expect(canaryDoc, "canary document must exist after indexing").toBeDefined();
|
||||
if (!canaryDoc) throw new Error("unreachable: canaryDoc asserted defined above");
|
||||
|
||||
const content = canaryDoc!.content ?? "";
|
||||
// content holds the LLM summary; the raw file body lives in source_markdown.
|
||||
// editor-content is the same endpoint the UI hits when opening a document.
|
||||
const editor = await getEditorContent(request, apiToken, searchSpace.id, canaryDoc.id);
|
||||
expect(
|
||||
content,
|
||||
`canary token ${CANARY_TOKENS.driveCanaryFile} should appear in Document.content; ` +
|
||||
`got first 200 chars: ${content.slice(0, 200)}`
|
||||
editor.source_markdown,
|
||||
`canary token ${CANARY_TOKENS.driveCanaryFile} should appear in editor source_markdown; ` +
|
||||
`got first 200 chars: ${editor.source_markdown.slice(0, 200)}`
|
||||
).toContain(CANARY_TOKENS.driveCanaryFile);
|
||||
expect(editor.chunk_count).toBeGreaterThan(0);
|
||||
|
||||
const refreshedConnectors = await listConnectors(request, apiToken, searchSpace.id);
|
||||
const refreshed = refreshedConnectors.find((c) => c.id === composioDriveConnector.id);
|
||||
|
|
|
|||
|
|
@ -38,3 +38,32 @@ export function isDocumentReady(doc: DocumentRow): boolean {
|
|||
typeof doc.status === "string" ? doc.status : doc.status?.state;
|
||||
return state === "ready" || state === "READY";
|
||||
}
|
||||
|
||||
export type EditorContent = {
|
||||
document_id: number;
|
||||
title: string;
|
||||
document_type: string;
|
||||
source_markdown: string;
|
||||
content_size_bytes: number;
|
||||
chunk_count: number;
|
||||
truncated: boolean;
|
||||
};
|
||||
|
||||
// Same endpoint the UI hits when a user opens a document in the dashboard.
|
||||
export async function getEditorContent(
|
||||
request: APIRequestContext,
|
||||
token: string,
|
||||
searchSpaceId: number,
|
||||
documentId: number
|
||||
): Promise<EditorContent> {
|
||||
const response = await request.get(
|
||||
`${BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`,
|
||||
{ headers: authHeaders(token) }
|
||||
);
|
||||
if (!response.ok()) {
|
||||
throw new Error(
|
||||
`getEditorContent failed (${response.status()}): ${await response.text()}`
|
||||
);
|
||||
}
|
||||
return (await response.json()) as EditorContent;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue