mirror of
https://github.com/willchen96/mike.git
synced 2026-06-08 20:25:13 +02:00
fix(security): scope tabular-review document_ids by access (CWE-639)
The tabular-review routes accept user-supplied document_ids in
request bodies (POST /tabular-review, PATCH /:reviewId) and stale
cell rows on byte-fetching paths (POST /:reviewId/regenerate-cell,
POST /:reviewId/generate). None of those paths checked whether the
caller can read those documents — a free-account attacker could plant
foreign UUIDs into their own review and have the server fetch the
bytes from R2 + run an LLM extraction over them, returning verbatim
text via the standard review GET.
Adds filterAccessibleDocumentIds(documentIds, userId, userEmail, db)
next to the existing access helpers (owner-of-doc OR project member),
and applies it at the four entry points:
- POST /tabular-review drop unauthorised on insert
- PATCH /:reviewId drop newly-added unauthorised; keep
already-attached cells so non-owner
collaborators don't accidentally
orphan rows they can't directly
access
- POST /:reviewId/regenerate-cell refuse byte fetch when caller has
no access to the underlying doc
- POST /:reviewId/generate filter docIds before parallel LLM
fetch (defense-in-depth for legacy
cells planted before this fix)
Fails closed silently rather than 403'ing so legacy clients that pass
stale ids don't error out the whole review.
Detected by Aeon + manual review.
Severity: high
CWE-639 (Authorization Bypass Through User-Controlled Key)
This commit is contained in:
parent
f40c25d07f
commit
e261d2e4bd
2 changed files with 97 additions and 6 deletions
|
|
@ -119,6 +119,47 @@ export async function ensureReviewAccess(
|
|||
return { ok: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter a list of document IDs down to those the caller is actually
|
||||
* authorised to read — owners pass, plus any document whose `project_id`
|
||||
* the caller has access to (own project or `shared_with` member).
|
||||
*
|
||||
* The tabular-review routes accept user-supplied `document_ids` from
|
||||
* request bodies; without this filter an attacker who has any review of
|
||||
* their own can plant arbitrary doc UUIDs and have the server fetch + run
|
||||
* an LLM extraction over their bytes (CWE-639).
|
||||
*/
|
||||
export async function filterAccessibleDocumentIds(
|
||||
documentIds: string[],
|
||||
userId: string,
|
||||
userEmail: string | null | undefined,
|
||||
db: Db,
|
||||
): Promise<string[]> {
|
||||
if (documentIds.length === 0) return [];
|
||||
const { data: docs } = await db
|
||||
.from("documents")
|
||||
.select("id, user_id, project_id")
|
||||
.in("id", documentIds);
|
||||
const rows = (docs ?? []) as {
|
||||
id: string;
|
||||
user_id: string;
|
||||
project_id: string | null;
|
||||
}[];
|
||||
if (rows.length === 0) return [];
|
||||
const accessibleProjectIds = new Set(
|
||||
await listAccessibleProjectIds(userId, userEmail, db),
|
||||
);
|
||||
const out: string[] = [];
|
||||
for (const d of rows) {
|
||||
if (d.user_id === userId) {
|
||||
out.push(d.id);
|
||||
} else if (d.project_id && accessibleProjectIds.has(d.project_id)) {
|
||||
out.push(d.id);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of project IDs the user can access — own projects plus
|
||||
* any project where their email is in `shared_with`. Used to scope chat
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import { getUserApiKeys, getUserModelSettings } from "../lib/userSettings";
|
|||
import {
|
||||
checkProjectAccess,
|
||||
ensureReviewAccess,
|
||||
filterAccessibleDocumentIds,
|
||||
listAccessibleProjectIds,
|
||||
} from "../lib/access";
|
||||
|
||||
|
|
@ -192,6 +193,18 @@ tabularRouter.post("/", requireAuth, async (req, res) => {
|
|||
if (!access.ok)
|
||||
return void res.status(404).json({ detail: "Project not found" });
|
||||
}
|
||||
// Drop any document_ids the caller can't access. Without this filter a
|
||||
// user can stuff foreign UUIDs into document_ids, then call /generate
|
||||
// or /regenerate-cell to read those documents' bytes back through the
|
||||
// LLM (CWE-639).
|
||||
const allowedDocumentIds = Array.isArray(document_ids)
|
||||
? await filterAccessibleDocumentIds(
|
||||
document_ids,
|
||||
userId,
|
||||
userEmail,
|
||||
db,
|
||||
)
|
||||
: [];
|
||||
const { data: review, error } = await db
|
||||
.from("tabular_reviews")
|
||||
.insert({
|
||||
|
|
@ -208,7 +221,7 @@ tabularRouter.post("/", requireAuth, async (req, res) => {
|
|||
.status(500)
|
||||
.json({ detail: error?.message ?? "Failed to create review" });
|
||||
|
||||
const cells = document_ids.flatMap((docId) =>
|
||||
const cells = allowedDocumentIds.flatMap((docId) =>
|
||||
columns_config.map((col) => ({
|
||||
review_id: review.id,
|
||||
document_id: docId,
|
||||
|
|
@ -498,10 +511,27 @@ tabularRouter.patch("/:reviewId", requireAuth, async (req, res) => {
|
|||
|
||||
if (Array.isArray(req.body.document_ids)) {
|
||||
// document_ids is the new source of truth — delete removed docs' cells
|
||||
const newDocIds = req.body.document_ids as string[];
|
||||
const requestedDocIds = req.body.document_ids as string[];
|
||||
const existingDocIds = (existingCells ?? []).map(
|
||||
(cell) => cell.document_id,
|
||||
);
|
||||
// Drop any newly-added doc_ids the caller can't read; preserve
|
||||
// already-attached docs so a non-owner collaborator's PATCH
|
||||
// doesn't accidentally orphan cells they can't directly access.
|
||||
const existingDocIdSet = new Set(existingDocIds);
|
||||
const newDocCandidates = requestedDocIds.filter(
|
||||
(id) => !existingDocIdSet.has(id),
|
||||
);
|
||||
const newDocAllowed = await filterAccessibleDocumentIds(
|
||||
newDocCandidates,
|
||||
userId,
|
||||
userEmail,
|
||||
db,
|
||||
);
|
||||
const newDocAllowedSet = new Set(newDocAllowed);
|
||||
const newDocIds = requestedDocIds.filter(
|
||||
(id) => existingDocIdSet.has(id) || newDocAllowedSet.has(id),
|
||||
);
|
||||
const removedDocIds = existingDocIds.filter(
|
||||
(id) => !newDocIds.includes(id),
|
||||
);
|
||||
|
|
@ -657,6 +687,17 @@ tabularRouter.post(
|
|||
if (!column)
|
||||
return void res.status(400).json({ detail: "Column not found" });
|
||||
|
||||
// Defense-in-depth — refuse to extract bytes for a document the
|
||||
// caller can't read, even if a stale tabular_cells row points at it
|
||||
// from before the access filter was added (CWE-639).
|
||||
const docAllowed = await filterAccessibleDocumentIds(
|
||||
[document_id],
|
||||
userId,
|
||||
userEmail,
|
||||
db,
|
||||
);
|
||||
if (docAllowed.length === 0)
|
||||
return void res.status(404).json({ detail: "Document not found" });
|
||||
const { data: doc } = await db
|
||||
.from("documents")
|
||||
.select("id, filename, file_type")
|
||||
|
|
@ -763,12 +804,21 @@ tabularRouter.post("/:reviewId/generate", requireAuth, async (req, res) => {
|
|||
cellMap.set(`${cell.document_id}:${cell.column_index}`, cell);
|
||||
|
||||
const docIds = [...new Set((cells ?? []).map((c) => c.document_id))];
|
||||
// Same defense-in-depth as /regenerate-cell — filter to docs the caller
|
||||
// can actually read, so legacy cells planted before the access check
|
||||
// can't be coerced into running an LLM extraction (CWE-639).
|
||||
const allowedDocIds = new Set(
|
||||
await filterAccessibleDocumentIds(docIds, userId, userEmail, db),
|
||||
);
|
||||
let docs: Record<string, unknown>[] = [];
|
||||
if (docIds.length > 0) {
|
||||
const { data } = await db
|
||||
.from("documents")
|
||||
.select("id, filename, file_type, page_count")
|
||||
.in("id", docIds);
|
||||
const filteredIds = docIds.filter((id) => allowedDocIds.has(id));
|
||||
const { data } = filteredIds.length > 0
|
||||
? await db
|
||||
.from("documents")
|
||||
.select("id, filename, file_type, page_count")
|
||||
.in("id", filteredIds)
|
||||
: { data: [] as Record<string, unknown>[] };
|
||||
docs = data ?? [];
|
||||
} else if (review.project_id) {
|
||||
const { data } = await db
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue