mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-12 09:12:40 +02:00
test(backend): enhance Drive file filtering and add unit tests for _drive_list_files
This commit is contained in:
parent
074b06441f
commit
8536bac29a
5 changed files with 139 additions and 24 deletions
|
|
@ -320,15 +320,54 @@ def _drive_list_files(args: dict[str, Any]) -> dict[str, Any]:
|
||||||
except IndexError:
|
except IndexError:
|
||||||
folder_id = "root"
|
folder_id = "root"
|
||||||
|
|
||||||
files = _DRIVE_FIXTURE.get(folder_id, [])
|
files = _filter_drive_files_for_query(q, _DRIVE_FIXTURE.get(folder_id, []))
|
||||||
return {
|
return {
|
||||||
"data": {
|
"data": {
|
||||||
"files": list(files),
|
"files": files,
|
||||||
"nextPageToken": None,
|
"nextPageToken": None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_quoted_value(q: str, anchor: str) -> str | None:
|
||||||
|
anchor_idx = q.find(anchor)
|
||||||
|
if anchor_idx == -1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
after_anchor = q[anchor_idx + len(anchor) :]
|
||||||
|
first_quote_idx = after_anchor.find("'")
|
||||||
|
if first_quote_idx == -1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
after_first_quote = after_anchor[first_quote_idx + 1 :]
|
||||||
|
second_quote_idx = after_first_quote.find("'")
|
||||||
|
if second_quote_idx == -1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return after_first_quote[:second_quote_idx]
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_drive_files_for_query(q: str, files: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
filtered = list(files)
|
||||||
|
|
||||||
|
if "trashed = false" in q:
|
||||||
|
filtered = [entry for entry in filtered if entry.get("trashed") is not True]
|
||||||
|
|
||||||
|
excluded_mime_type = _extract_quoted_value(q, "mimeType !=")
|
||||||
|
if excluded_mime_type:
|
||||||
|
filtered = [
|
||||||
|
entry for entry in filtered if entry.get("mimeType") != excluded_mime_type
|
||||||
|
]
|
||||||
|
|
||||||
|
included_mime_type = _extract_quoted_value(q, "mimeType =")
|
||||||
|
if included_mime_type:
|
||||||
|
filtered = [
|
||||||
|
entry for entry in filtered if entry.get("mimeType") == included_mime_type
|
||||||
|
]
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
def _drive_download_file(args: dict[str, Any]) -> dict[str, Any]:
|
def _drive_download_file(args: dict[str, Any]) -> dict[str, Any]:
|
||||||
"""Mimic GOOGLEDRIVE_DOWNLOAD_FILE.
|
"""Mimic GOOGLEDRIVE_DOWNLOAD_FILE.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,21 @@
|
||||||
"mimeType": "text/csv",
|
"mimeType": "text/csv",
|
||||||
"modifiedTime": "2025-01-25T13:45:00.000Z",
|
"modifiedTime": "2025-01-25T13:45:00.000Z",
|
||||||
"createdTime": "2025-01-25T13:45:00.000Z"
|
"createdTime": "2025-01-25T13:45:00.000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "fake-shortcut-canary",
|
||||||
|
"name": "Shortcut to Canary",
|
||||||
|
"mimeType": "application/vnd.google-apps.shortcut",
|
||||||
|
"modifiedTime": "2025-02-10T12:00:00.000Z",
|
||||||
|
"createdTime": "2025-02-10T12:00:00.000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "fake-file-trashed",
|
||||||
|
"name": "trashed-e2e-note.txt",
|
||||||
|
"mimeType": "text/plain",
|
||||||
|
"modifiedTime": "2025-02-11T09:00:00.000Z",
|
||||||
|
"createdTime": "2025-02-11T09:00:00.000Z",
|
||||||
|
"trashed": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fake-folder-projects": [
|
"fake-folder-projects": [
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
from tests.e2e.fakes.composio_module import _drive_list_files
|
||||||
|
|
||||||
|
|
||||||
|
def _ids(result: dict) -> set[str]:
|
||||||
|
return {item["id"] for item in result["data"]["files"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_drive_list_files_filters_shortcuts_and_trashed_items():
|
||||||
|
result = _drive_list_files(
|
||||||
|
{
|
||||||
|
"q": (
|
||||||
|
"'root' in parents and trashed = false and "
|
||||||
|
"mimeType != 'application/vnd.google-apps.shortcut'"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
ids = _ids(result)
|
||||||
|
|
||||||
|
assert "fake-file-canary" in ids
|
||||||
|
assert "fake-shortcut-canary" not in ids
|
||||||
|
assert "fake-file-trashed" not in ids
|
||||||
|
|
||||||
|
|
||||||
|
def test_drive_list_files_filters_to_exact_mime_type():
|
||||||
|
result = _drive_list_files(
|
||||||
|
{"q": "'root' in parents and trashed = false and mimeType = 'text/plain'"}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _ids(result) == {"fake-file-canary"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_drive_list_files_uses_requested_parent_folder():
|
||||||
|
result = _drive_list_files(
|
||||||
|
{"q": "'fake-folder-projects' in parents and trashed = false"}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert _ids(result) == {"fake-file-roadmap"}
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import { composioDriveTest as test, expect } from "../../../fixtures";
|
import { composioDriveTest as test, expect } from "../../../fixtures";
|
||||||
import { listConnectors, triggerIndex, updateConnectorConfig } from "../../../helpers/api/connectors";
|
import { listConnectors, triggerIndex, updateConnectorConfig } from "../../../helpers/api/connectors";
|
||||||
import { listDocuments } from "../../../helpers/api/documents";
|
import { getEditorContent, listDocuments } from "../../../helpers/api/documents";
|
||||||
import { CANARY_TOKENS, FAKE_DRIVE_FILES } from "../../../helpers/canary";
|
import { CANARY_TOKENS, FAKE_DRIVE_FILES } from "../../../helpers/canary";
|
||||||
import { openConnectorPopup } from "../../../helpers/ui/connector-popup";
|
import { openConnectorPopup } from "../../../helpers/ui/connector-popup";
|
||||||
import {
|
import {
|
||||||
|
|
@ -9,19 +9,11 @@ import {
|
||||||
} from "../../../helpers/waits/indexing";
|
} from "../../../helpers/waits/indexing";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Composio Drive user journey.
|
* Proves the Drive wiring from OAuth fixture -> selection persistence ->
|
||||||
|
* indexing -> stored source_markdown -> editor-content retrieval.
|
||||||
*
|
*
|
||||||
* User expectation:
|
* UI-driven file selection, chat retrieval, and LLM/embedding quality are
|
||||||
* "I connect Google Drive, choose the files/folders I care about,
|
* covered by later phases or narrower tests.
|
||||||
* wait for indexing, and then my Drive content is available in SurfSense."
|
|
||||||
*
|
|
||||||
* The OAuth connection is handled by the composioDriveConnector fixture so
|
|
||||||
* this test can focus on the user-visible expectation. The spec still touches
|
|
||||||
* the browser (dashboard + connector dialog) and then uses API helpers for
|
|
||||||
* selection/indexing to keep the expensive pipeline assertion deterministic.
|
|
||||||
*
|
|
||||||
* If this passes, the seam from Composio connection -> selection persistence ->
|
|
||||||
* Celery indexing -> document storage is wired correctly.
|
|
||||||
*/
|
*/
|
||||||
test.describe("Composio Drive journey", () => {
|
test.describe("Composio Drive journey", () => {
|
||||||
test(
|
test(
|
||||||
|
|
@ -33,11 +25,9 @@ test.describe("Composio Drive journey", () => {
|
||||||
waitUntil: "domcontentloaded",
|
waitUntil: "domcontentloaded",
|
||||||
});
|
});
|
||||||
await openConnectorPopup(page);
|
await openConnectorPopup(page);
|
||||||
await expect(
|
const connectorDialog = page.getByRole("dialog", { name: "Manage Connectors" });
|
||||||
page
|
await expect(connectorDialog).toBeVisible();
|
||||||
.getByRole("dialog", { name: "Manage Connectors" })
|
await expect(connectorDialog.getByRole("button", { name: "Manage" })).toBeVisible();
|
||||||
.getByText("Search your Drive files via Composio")
|
|
||||||
).toBeVisible();
|
|
||||||
|
|
||||||
await updateConnectorConfig(request, apiToken, composioDriveConnector.id, {
|
await updateConnectorConfig(request, apiToken, composioDriveConnector.id, {
|
||||||
...composioDriveConnector.config,
|
...composioDriveConnector.config,
|
||||||
|
|
@ -89,13 +79,17 @@ test.describe("Composio Drive journey", () => {
|
||||||
const canaryDoc = docs.find((d) => d.title === FAKE_DRIVE_FILES.canary.name);
|
const canaryDoc = docs.find((d) => d.title === FAKE_DRIVE_FILES.canary.name);
|
||||||
|
|
||||||
expect(canaryDoc, "canary document must exist after indexing").toBeDefined();
|
expect(canaryDoc, "canary document must exist after indexing").toBeDefined();
|
||||||
|
if (!canaryDoc) throw new Error("unreachable: canaryDoc asserted defined above");
|
||||||
|
|
||||||
const content = canaryDoc!.content ?? "";
|
// content holds the LLM summary; the raw file body lives in source_markdown.
|
||||||
|
// editor-content is the same endpoint the UI hits when opening a document.
|
||||||
|
const editor = await getEditorContent(request, apiToken, searchSpace.id, canaryDoc.id);
|
||||||
expect(
|
expect(
|
||||||
content,
|
editor.source_markdown,
|
||||||
`canary token ${CANARY_TOKENS.driveCanaryFile} should appear in Document.content; ` +
|
`canary token ${CANARY_TOKENS.driveCanaryFile} should appear in editor source_markdown; ` +
|
||||||
`got first 200 chars: ${content.slice(0, 200)}`
|
`got first 200 chars: ${editor.source_markdown.slice(0, 200)}`
|
||||||
).toContain(CANARY_TOKENS.driveCanaryFile);
|
).toContain(CANARY_TOKENS.driveCanaryFile);
|
||||||
|
expect(editor.chunk_count).toBeGreaterThan(0);
|
||||||
|
|
||||||
const refreshedConnectors = await listConnectors(request, apiToken, searchSpace.id);
|
const refreshedConnectors = await listConnectors(request, apiToken, searchSpace.id);
|
||||||
const refreshed = refreshedConnectors.find((c) => c.id === composioDriveConnector.id);
|
const refreshed = refreshedConnectors.find((c) => c.id === composioDriveConnector.id);
|
||||||
|
|
|
||||||
|
|
@ -38,3 +38,32 @@ export function isDocumentReady(doc: DocumentRow): boolean {
|
||||||
typeof doc.status === "string" ? doc.status : doc.status?.state;
|
typeof doc.status === "string" ? doc.status : doc.status?.state;
|
||||||
return state === "ready" || state === "READY";
|
return state === "ready" || state === "READY";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type EditorContent = {
|
||||||
|
document_id: number;
|
||||||
|
title: string;
|
||||||
|
document_type: string;
|
||||||
|
source_markdown: string;
|
||||||
|
content_size_bytes: number;
|
||||||
|
chunk_count: number;
|
||||||
|
truncated: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Same endpoint the UI hits when a user opens a document in the dashboard.
|
||||||
|
export async function getEditorContent(
|
||||||
|
request: APIRequestContext,
|
||||||
|
token: string,
|
||||||
|
searchSpaceId: number,
|
||||||
|
documentId: number
|
||||||
|
): Promise<EditorContent> {
|
||||||
|
const response = await request.get(
|
||||||
|
`${BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`,
|
||||||
|
{ headers: authHeaders(token) }
|
||||||
|
);
|
||||||
|
if (!response.ok()) {
|
||||||
|
throw new Error(
|
||||||
|
`getEditorContent failed (${response.status()}): ${await response.text()}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return (await response.json()) as EditorContent;
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue