diff --git a/backend/src/lib/access.ts b/backend/src/lib/access.ts index 6f2c869..b9c5486 100644 --- a/backend/src/lib/access.ts +++ b/backend/src/lib/access.ts @@ -119,6 +119,47 @@ export async function ensureReviewAccess( return { ok: false }; } +/** + * Filter a list of document IDs down to those the caller is actually + * authorised to read — owners pass, plus any document whose `project_id` + * the caller has access to (own project or `shared_with` member). + * + * The tabular-review routes accept user-supplied `document_ids` from + * request bodies; without this filter an attacker who has any review of + * their own can plant arbitrary doc UUIDs and have the server fetch + run + * an LLM extraction over their bytes (CWE-639). + */ +export async function filterAccessibleDocumentIds( + documentIds: string[], + userId: string, + userEmail: string | null | undefined, + db: Db, +): Promise { + if (documentIds.length === 0) return []; + const { data: docs } = await db + .from("documents") + .select("id, user_id, project_id") + .in("id", documentIds); + const rows = (docs ?? []) as { + id: string; + user_id: string; + project_id: string | null; + }[]; + if (rows.length === 0) return []; + const accessibleProjectIds = new Set( + await listAccessibleProjectIds(userId, userEmail, db), + ); + const out: string[] = []; + for (const d of rows) { + if (d.user_id === userId) { + out.push(d.id); + } else if (d.project_id && accessibleProjectIds.has(d.project_id)) { + out.push(d.id); + } + } + return out; +} + /** * Returns the set of project IDs the user can access — own projects plus * any project where their email is in `shared_with`. Used to scope chat diff --git a/backend/src/routes/tabular.ts b/backend/src/routes/tabular.ts index 2b4f6db..79578b0 100644 --- a/backend/src/routes/tabular.ts +++ b/backend/src/routes/tabular.ts @@ -15,6 +15,7 @@ import { getUserApiKeys, getUserModelSettings } from "../lib/userSettings"; import { checkProjectAccess, ensureReviewAccess, + filterAccessibleDocumentIds, listAccessibleProjectIds, } from "../lib/access"; @@ -192,6 +193,18 @@ tabularRouter.post("/", requireAuth, async (req, res) => { if (!access.ok) return void res.status(404).json({ detail: "Project not found" }); } + // Drop any document_ids the caller can't access. Without this filter a + // user can stuff foreign UUIDs into document_ids, then call /generate + // or /regenerate-cell to read those documents' bytes back through the + // LLM (CWE-639). + const allowedDocumentIds = Array.isArray(document_ids) + ? await filterAccessibleDocumentIds( + document_ids, + userId, + userEmail, + db, + ) + : []; const { data: review, error } = await db .from("tabular_reviews") .insert({ @@ -208,7 +221,7 @@ tabularRouter.post("/", requireAuth, async (req, res) => { .status(500) .json({ detail: error?.message ?? "Failed to create review" }); - const cells = document_ids.flatMap((docId) => + const cells = allowedDocumentIds.flatMap((docId) => columns_config.map((col) => ({ review_id: review.id, document_id: docId, @@ -498,10 +511,27 @@ tabularRouter.patch("/:reviewId", requireAuth, async (req, res) => { if (Array.isArray(req.body.document_ids)) { // document_ids is the new source of truth — delete removed docs' cells - const newDocIds = req.body.document_ids as string[]; + const requestedDocIds = req.body.document_ids as string[]; const existingDocIds = (existingCells ?? []).map( (cell) => cell.document_id, ); + // Drop any newly-added doc_ids the caller can't read; preserve + // already-attached docs so a non-owner collaborator's PATCH + // doesn't accidentally orphan cells they can't directly access. + const existingDocIdSet = new Set(existingDocIds); + const newDocCandidates = requestedDocIds.filter( + (id) => !existingDocIdSet.has(id), + ); + const newDocAllowed = await filterAccessibleDocumentIds( + newDocCandidates, + userId, + userEmail, + db, + ); + const newDocAllowedSet = new Set(newDocAllowed); + const newDocIds = requestedDocIds.filter( + (id) => existingDocIdSet.has(id) || newDocAllowedSet.has(id), + ); const removedDocIds = existingDocIds.filter( (id) => !newDocIds.includes(id), ); @@ -657,6 +687,17 @@ tabularRouter.post( if (!column) return void res.status(400).json({ detail: "Column not found" }); + // Defense-in-depth — refuse to extract bytes for a document the + // caller can't read, even if a stale tabular_cells row points at it + // from before the access filter was added (CWE-639). + const docAllowed = await filterAccessibleDocumentIds( + [document_id], + userId, + userEmail, + db, + ); + if (docAllowed.length === 0) + return void res.status(404).json({ detail: "Document not found" }); const { data: doc } = await db .from("documents") .select("id, filename, file_type") @@ -763,12 +804,21 @@ tabularRouter.post("/:reviewId/generate", requireAuth, async (req, res) => { cellMap.set(`${cell.document_id}:${cell.column_index}`, cell); const docIds = [...new Set((cells ?? []).map((c) => c.document_id))]; + // Same defense-in-depth as /regenerate-cell — filter to docs the caller + // can actually read, so legacy cells planted before the access check + // can't be coerced into running an LLM extraction (CWE-639). + const allowedDocIds = new Set( + await filterAccessibleDocumentIds(docIds, userId, userEmail, db), + ); let docs: Record[] = []; if (docIds.length > 0) { - const { data } = await db - .from("documents") - .select("id, filename, file_type, page_count") - .in("id", docIds); + const filteredIds = docIds.filter((id) => allowedDocIds.has(id)); + const { data } = filteredIds.length > 0 + ? await db + .from("documents") + .select("id, filename, file_type, page_count") + .in("id", filteredIds) + : { data: [] as Record[] }; docs = data ?? []; } else if (review.project_id) { const { data } = await db