fix(security): scope tabular-review document_ids by access (CWE-639)

The tabular-review routes accept user-supplied document_ids in
request bodies (POST /tabular-review, PATCH /:reviewId) and stale
cell rows on byte-fetching paths (POST /:reviewId/regenerate-cell,
POST /:reviewId/generate). None of those paths checked whether the
caller can read those documents — a free-account attacker could plant
foreign UUIDs into their own review and have the server fetch the
bytes from R2 + run an LLM extraction over them, returning verbatim
text via the standard review GET.

Adds filterAccessibleDocumentIds(documentIds, userId, userEmail, db)
next to the existing access helpers (owner-of-doc OR project member),
and applies it at the four entry points:

- POST /tabular-review               drop unauthorised on insert
- PATCH /:reviewId                   drop newly-added unauthorised; keep
                                     already-attached cells so non-owner
                                     collaborators don't accidentally
                                     orphan rows they can't directly
                                     access
- POST /:reviewId/regenerate-cell    refuse byte fetch when caller has
                                     no access to the underlying doc
- POST /:reviewId/generate           filter docIds before parallel LLM
                                     fetch (defense-in-depth for legacy
                                     cells planted before this fix)

Fails closed silently rather than 403'ing so legacy clients that pass
stale ids don't error out the whole review.

Detected by Aeon + manual review.
Severity: high
CWE-639 (Authorization Bypass Through User-Controlled Key)
This commit is contained in:
Aeon (aaronjmars) 2026-05-10 04:50:21 +00:00
parent f40c25d07f
commit e261d2e4bd
2 changed files with 97 additions and 6 deletions

View file

@ -119,6 +119,47 @@ export async function ensureReviewAccess(
return { ok: false };
}
/**
* Filter a list of document IDs down to those the caller is actually
* authorised to read owners pass, plus any document whose `project_id`
* the caller has access to (own project or `shared_with` member).
*
* The tabular-review routes accept user-supplied `document_ids` from
* request bodies; without this filter an attacker who has any review of
* their own can plant arbitrary doc UUIDs and have the server fetch + run
* an LLM extraction over their bytes (CWE-639).
*/
export async function filterAccessibleDocumentIds(
documentIds: string[],
userId: string,
userEmail: string | null | undefined,
db: Db,
): Promise<string[]> {
if (documentIds.length === 0) return [];
const { data: docs } = await db
.from("documents")
.select("id, user_id, project_id")
.in("id", documentIds);
const rows = (docs ?? []) as {
id: string;
user_id: string;
project_id: string | null;
}[];
if (rows.length === 0) return [];
const accessibleProjectIds = new Set(
await listAccessibleProjectIds(userId, userEmail, db),
);
const out: string[] = [];
for (const d of rows) {
if (d.user_id === userId) {
out.push(d.id);
} else if (d.project_id && accessibleProjectIds.has(d.project_id)) {
out.push(d.id);
}
}
return out;
}
/**
* Returns the set of project IDs the user can access own projects plus
* any project where their email is in `shared_with`. Used to scope chat

View file

@ -15,6 +15,7 @@ import { getUserApiKeys, getUserModelSettings } from "../lib/userSettings";
import {
checkProjectAccess,
ensureReviewAccess,
filterAccessibleDocumentIds,
listAccessibleProjectIds,
} from "../lib/access";
@ -192,6 +193,18 @@ tabularRouter.post("/", requireAuth, async (req, res) => {
if (!access.ok)
return void res.status(404).json({ detail: "Project not found" });
}
// Drop any document_ids the caller can't access. Without this filter a
// user can stuff foreign UUIDs into document_ids, then call /generate
// or /regenerate-cell to read those documents' bytes back through the
// LLM (CWE-639).
const allowedDocumentIds = Array.isArray(document_ids)
? await filterAccessibleDocumentIds(
document_ids,
userId,
userEmail,
db,
)
: [];
const { data: review, error } = await db
.from("tabular_reviews")
.insert({
@ -208,7 +221,7 @@ tabularRouter.post("/", requireAuth, async (req, res) => {
.status(500)
.json({ detail: error?.message ?? "Failed to create review" });
const cells = document_ids.flatMap((docId) =>
const cells = allowedDocumentIds.flatMap((docId) =>
columns_config.map((col) => ({
review_id: review.id,
document_id: docId,
@ -498,10 +511,27 @@ tabularRouter.patch("/:reviewId", requireAuth, async (req, res) => {
if (Array.isArray(req.body.document_ids)) {
// document_ids is the new source of truth — delete removed docs' cells
const newDocIds = req.body.document_ids as string[];
const requestedDocIds = req.body.document_ids as string[];
const existingDocIds = (existingCells ?? []).map(
(cell) => cell.document_id,
);
// Drop any newly-added doc_ids the caller can't read; preserve
// already-attached docs so a non-owner collaborator's PATCH
// doesn't accidentally orphan cells they can't directly access.
const existingDocIdSet = new Set(existingDocIds);
const newDocCandidates = requestedDocIds.filter(
(id) => !existingDocIdSet.has(id),
);
const newDocAllowed = await filterAccessibleDocumentIds(
newDocCandidates,
userId,
userEmail,
db,
);
const newDocAllowedSet = new Set(newDocAllowed);
const newDocIds = requestedDocIds.filter(
(id) => existingDocIdSet.has(id) || newDocAllowedSet.has(id),
);
const removedDocIds = existingDocIds.filter(
(id) => !newDocIds.includes(id),
);
@ -657,6 +687,17 @@ tabularRouter.post(
if (!column)
return void res.status(400).json({ detail: "Column not found" });
// Defense-in-depth — refuse to extract bytes for a document the
// caller can't read, even if a stale tabular_cells row points at it
// from before the access filter was added (CWE-639).
const docAllowed = await filterAccessibleDocumentIds(
[document_id],
userId,
userEmail,
db,
);
if (docAllowed.length === 0)
return void res.status(404).json({ detail: "Document not found" });
const { data: doc } = await db
.from("documents")
.select("id, filename, file_type")
@ -763,12 +804,21 @@ tabularRouter.post("/:reviewId/generate", requireAuth, async (req, res) => {
cellMap.set(`${cell.document_id}:${cell.column_index}`, cell);
const docIds = [...new Set((cells ?? []).map((c) => c.document_id))];
// Same defense-in-depth as /regenerate-cell — filter to docs the caller
// can actually read, so legacy cells planted before the access check
// can't be coerced into running an LLM extraction (CWE-639).
const allowedDocIds = new Set(
await filterAccessibleDocumentIds(docIds, userId, userEmail, db),
);
let docs: Record<string, unknown>[] = [];
if (docIds.length > 0) {
const { data } = await db
.from("documents")
.select("id, filename, file_type, page_count")
.in("id", docIds);
const filteredIds = docIds.filter((id) => allowedDocIds.has(id));
const { data } = filteredIds.length > 0
? await db
.from("documents")
.select("id, filename, file_type, page_count")
.in("id", filteredIds)
: { data: [] as Record<string, unknown>[] };
docs = data ?? [];
} else if (review.project_id) {
const { data } = await db