mike/backend/src/routes/projects.ts

905 lines
31 KiB
TypeScript

import { Router } from "express";
import { requireAuth } from "../middleware/auth";
import { createServerSupabase } from "../lib/supabase";
import { createClient } from "@supabase/supabase-js";
import {
attachActiveVersionPaths,
attachLatestVersionNumbers,
} from "../lib/documentVersions";
import { downloadFile, uploadFile, storageKey } from "../lib/storage";
import { docxToPdf, convertedPdfKey } from "../lib/convert";
import { checkProjectAccess } from "../lib/access";
import { singleFileUpload } from "../lib/upload";
export const projectsRouter = Router();
const ALLOWED_TYPES = new Set(["pdf", "docx", "doc"]);
function normalizeDocumentFilename(nextName: unknown, currentName: string) {
if (typeof nextName !== "string") return null;
const trimmed = nextName.trim().slice(0, 200);
if (!trimmed) return null;
if (/\.[a-z0-9]{1,6}$/i.test(trimmed)) return trimmed;
const ext = currentName.match(/\.[a-z0-9]{1,6}$/i)?.[0] ?? "";
return `${trimmed}${ext}`;
}
// GET /projects
projectsRouter.get("/", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string;
const db = createServerSupabase();
const { data: ownProjects, error: ownError } = await db
.from("projects")
.select("*")
.eq("user_id", userId)
.order("created_at", { ascending: false });
if (ownError) return void res.status(500).json({ detail: ownError.message });
const { data: sharedProjects, error: sharedError } = userEmail
? await db
.from("projects")
.select("*")
.filter("shared_with", "cs", JSON.stringify([userEmail]))
.neq("user_id", userId)
.order("created_at", { ascending: false })
: { data: [], error: null };
if (sharedError)
return void res.status(500).json({ detail: sharedError.message });
const projects = [...(ownProjects ?? []), ...(sharedProjects ?? [])].sort(
(a, b) =>
new Date(b.created_at).getTime() - new Date(a.created_at).getTime(),
);
const result = await Promise.all(
projects.map(async (p) => {
const [docs, chats, reviews] = await Promise.all([
db
.from("documents")
.select("id", { count: "exact", head: true })
.eq("project_id", p.id),
db
.from("chats")
.select("id", { count: "exact", head: true })
.eq("project_id", p.id),
db
.from("tabular_reviews")
.select("id", { count: "exact", head: true })
.eq("project_id", p.id),
]);
return {
...p,
is_owner: p.user_id === userId,
document_count: docs.count ?? 0,
chat_count: chats.count ?? 0,
review_count: reviews.count ?? 0,
};
}),
);
res.json(result);
});
// POST /projects
projectsRouter.post("/", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { name, cm_number, shared_with } = req.body as {
name: string;
cm_number?: string;
shared_with?: string[];
};
if (!name?.trim())
return void res.status(400).json({ detail: "name is required" });
const normalizedUserEmail = userEmail?.trim().toLowerCase();
const cleanedSharedWith: string[] = [];
const seenSharedEmails = new Set<string>();
if (Array.isArray(shared_with)) {
for (const raw of shared_with) {
if (typeof raw !== "string") continue;
const e = raw.trim().toLowerCase();
if (!e || seenSharedEmails.has(e)) continue;
if (normalizedUserEmail && e === normalizedUserEmail) {
return void res
.status(400)
.json({ detail: "You cannot share a project with yourself." });
}
seenSharedEmails.add(e);
cleanedSharedWith.push(e);
}
}
const db = createServerSupabase();
const { data, error } = await db
.from("projects")
.insert({
user_id: userId,
name: name.trim(),
cm_number: cm_number ?? null,
shared_with: cleanedSharedWith,
})
.select("*")
.single();
if (error) return void res.status(500).json({ detail: error.message });
res.status(201).json({ ...data, documents: [] });
});
// GET /projects/:projectId
projectsRouter.get("/:projectId", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string;
const { projectId } = req.params;
const db = createServerSupabase();
const { data: project, error } = await db
.from("projects")
.select("*")
.eq("id", projectId)
.single();
if (error || !project)
return void res.status(404).json({ detail: "Project not found" });
const canAccess =
project.user_id === userId ||
(userEmail &&
Array.isArray(project.shared_with) &&
project.shared_with.includes(userEmail));
if (!canAccess)
return void res.status(404).json({ detail: "Project not found" });
const [{ data: docs }, { data: folderData }] = await Promise.all([
db.from("documents").select("*").eq("project_id", projectId).order("created_at", { ascending: true }),
db.from("project_subfolders").select("*").eq("project_id", projectId).order("created_at", { ascending: true }),
]);
const docsTyped = (docs ?? []) as unknown as {
id: string;
current_version_id?: string | null;
}[];
await attachLatestVersionNumbers(db, docsTyped);
await attachActiveVersionPaths(db, docsTyped);
res.json({
...project,
is_owner: project.user_id === userId,
documents: docsTyped,
folders: folderData ?? [],
});
});
// GET /projects/:projectId/people
// Resolve the owner + every shared member to {email, display_name}. Used
// by the People modal so the UI can show display names where available
// and tag the current user as "You".
projectsRouter.get("/:projectId/people", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId } = req.params;
const db = createServerSupabase();
const { data: project } = await db
.from("projects")
.select("id, user_id, shared_with")
.eq("id", projectId)
.single();
if (!project)
return void res.status(404).json({ detail: "Project not found" });
const isOwner = project.user_id === userId;
const sharedWith = (Array.isArray(project.shared_with)
? (project.shared_with as string[])
: []
).map((e) => e.toLowerCase());
const isShared =
!!userEmail && sharedWith.includes(userEmail.toLowerCase());
if (!isOwner && !isShared)
return void res.status(404).json({ detail: "Project not found" });
// Pull every auth user (matching the lookup endpoint's pattern). For
// larger deployments this should page or be replaced with a bulk-by-id
// RPC, but it keeps things simple while user counts are modest.
const { data: usersData } = await db.auth.admin.listUsers({ perPage: 1000 });
const allUsers = usersData?.users ?? [];
const userByEmail = new Map<string, { id: string; email: string }>();
const userById = new Map<string, { id: string; email: string }>();
for (const u of allUsers) {
if (!u.email) continue;
const lower = u.email.toLowerCase();
userByEmail.set(lower, { id: u.id, email: u.email });
userById.set(u.id, { id: u.id, email: u.email });
}
const memberUserIds: string[] = [];
for (const email of sharedWith) {
const u = userByEmail.get(email);
if (u) memberUserIds.push(u.id);
}
const profileIds = [
project.user_id as string,
...memberUserIds,
].filter((x, i, arr) => arr.indexOf(x) === i);
const profileByUserId = new Map<
string,
{ display_name: string | null; organisation: string | null }
>();
if (profileIds.length > 0) {
const { data: profiles } = await db
.from("user_profiles")
.select("user_id, display_name, organisation")
.in("user_id", profileIds);
for (const p of profiles ?? []) {
profileByUserId.set(p.user_id as string, {
display_name: (p.display_name as string | null) ?? null,
organisation: (p.organisation as string | null) ?? null,
});
}
}
const ownerInfo = userById.get(project.user_id as string);
const owner = {
user_id: project.user_id,
email: ownerInfo?.email ?? null,
display_name:
profileByUserId.get(project.user_id as string)?.display_name ?? null,
};
const members = sharedWith.map((email) => {
const u = userByEmail.get(email);
const display_name = u
? profileByUserId.get(u.id)?.display_name ?? null
: null;
return { email, display_name };
});
res.json({ owner, members });
});
// PATCH /projects/:projectId
projectsRouter.patch("/:projectId", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId } = req.params;
const updates: Record<string, unknown> = {};
if (req.body.name != null) updates.name = req.body.name;
if (req.body.cm_number != null) updates.cm_number = req.body.cm_number;
if (Array.isArray(req.body.shared_with)) {
// Normalise: lowercase + dedupe + drop empties.
const normalizedUserEmail = userEmail?.trim().toLowerCase();
const seen = new Set<string>();
const cleaned: string[] = [];
for (const raw of req.body.shared_with) {
if (typeof raw !== "string") continue;
const e = raw.trim().toLowerCase();
if (!e || seen.has(e)) continue;
if (normalizedUserEmail && e === normalizedUserEmail) {
return void res
.status(400)
.json({ detail: "You cannot share a project with yourself." });
}
seen.add(e);
cleaned.push(e);
}
updates.shared_with = cleaned;
}
const db = createServerSupabase();
const { data, error } = await db
.from("projects")
.update({ ...updates, updated_at: new Date().toISOString() })
.eq("id", projectId)
.eq("user_id", userId)
.select("*")
.single();
if (error || !data)
return void res.status(404).json({ detail: "Project not found" });
const [{ data: docs }, { data: folderData }] = await Promise.all([
db.from("documents").select("*").eq("project_id", projectId).order("created_at", { ascending: true }),
db.from("project_subfolders").select("*").eq("project_id", projectId).order("created_at", { ascending: true }),
]);
const docsTyped = (docs ?? []) as unknown as {
id: string;
current_version_id?: string | null;
}[];
await attachActiveVersionPaths(db, docsTyped);
res.json({ ...data, documents: docsTyped, folders: folderData ?? [] });
});
// DELETE /projects/:projectId
projectsRouter.delete("/:projectId", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const { projectId } = req.params;
const db = createServerSupabase();
const { error } = await db
.from("projects")
.delete()
.eq("id", projectId)
.eq("user_id", userId);
if (error) return void res.status(500).json({ detail: error.message });
res.status(204).send();
});
// GET /projects/:projectId/documents
projectsRouter.get("/:projectId/documents", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId } = req.params;
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok)
return void res.status(404).json({ detail: "Project not found" });
const { data: docs } = await db
.from("documents")
.select("*")
.eq("project_id", projectId)
.order("created_at", { ascending: true });
const docsTyped = (docs ?? []) as unknown as {
id: string;
current_version_id?: string | null;
}[];
await attachActiveVersionPaths(db, docsTyped);
res.json(docsTyped);
});
// POST /projects/:projectId/documents/:documentId — assign or copy existing doc into project
projectsRouter.post(
"/:projectId/documents/:documentId",
requireAuth,
async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId, documentId } = req.params;
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok)
return void res.status(404).json({ detail: "Project not found" });
// Adding-by-id pulls a doc into the project — only the doc's owner
// is allowed to do that, so other people's standalone docs can't be
// siphoned into a project the requester happens to share.
const { data: doc } = await db
.from("documents")
.select("*")
.eq("id", documentId)
.eq("user_id", userId)
.single();
if (!doc)
return void res.status(404).json({ detail: "Document not found" });
// Already in this project — idempotent
if (doc.project_id === projectId) return void res.json(doc);
if (doc.project_id === null) {
// Standalone → assign project_id
const { data: updated, error } = await db
.from("documents")
.update({ project_id: projectId, updated_at: new Date().toISOString() })
.eq("id", documentId)
.select("*")
.single();
if (error || !updated)
return void res.status(500).json({ detail: "Failed to update document" });
return void res.json(updated);
} else {
// Belongs to another project → duplicate record AND copy the
// underlying storage objects so each project's copy is fully
// independent (edits/version bumps on one don't leak into the
// other).
const { data: copy, error } = await db
.from("documents")
.insert({
project_id: projectId,
user_id: userId,
filename: doc.filename,
file_type: doc.file_type,
size_bytes: doc.size_bytes,
page_count: doc.page_count,
structure_tree: doc.structure_tree,
status: doc.status,
})
.select("*")
.single();
if (error || !copy)
return void res.status(500).json({ detail: "Failed to copy document" });
let copyVersionRowId: string | null = null;
if (doc.current_version_id) {
const { data: srcV } = await db
.from("document_versions")
.select(
"storage_path, pdf_storage_path, version_number, display_name, source",
)
.eq("id", doc.current_version_id)
.single();
if (srcV?.storage_path) {
const srcBytes = await downloadFile(srcV.storage_path);
if (!srcBytes) {
return void res
.status(500)
.json({ detail: "Failed to read source document bytes" });
}
const newKey = storageKey(userId, copy.id as string, doc.filename);
const contentType =
doc.file_type === "pdf"
? "application/pdf"
: "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
await uploadFile(newKey, srcBytes, contentType);
// PDFs share one object for source + display rendition. DOCX
// store the converted PDF at a separate `converted-pdfs/` key —
// copy that too if it exists so the copy renders without going
// back through libreoffice.
let newPdfPath: string | null = null;
if (srcV.pdf_storage_path) {
if (srcV.pdf_storage_path === srcV.storage_path) {
newPdfPath = newKey;
} else {
const pdfBytes = await downloadFile(srcV.pdf_storage_path);
if (pdfBytes) {
const newPdfKey = convertedPdfKey(userId, copy.id as string);
await uploadFile(newPdfKey, pdfBytes, "application/pdf");
newPdfPath = newPdfKey;
}
}
}
const { data: newV } = await db
.from("document_versions")
.insert({
document_id: copy.id,
storage_path: newKey,
pdf_storage_path: newPdfPath,
source: (srcV.source as string | null) ?? "upload",
version_number: srcV.version_number ?? 1,
display_name: srcV.display_name ?? doc.filename,
})
.select("id")
.single();
copyVersionRowId = (newV?.id as string | null) ?? null;
if (copyVersionRowId) {
await db
.from("documents")
.update({ current_version_id: copyVersionRowId })
.eq("id", copy.id);
}
}
}
return void res.status(201).json(copy);
}
},
);
// PATCH /projects/:projectId/documents/:documentId — rename a project document
projectsRouter.patch("/:projectId/documents/:documentId", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId, documentId } = req.params;
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok)
return void res.status(404).json({ detail: "Project not found" });
const { data: doc } = await db
.from("documents")
.select("id, filename, current_version_id")
.eq("id", documentId)
.eq("project_id", projectId)
.single();
if (!doc)
return void res.status(404).json({ detail: "Document not found" });
const filename = normalizeDocumentFilename(req.body?.filename, doc.filename as string);
if (!filename)
return void res.status(400).json({ detail: "filename is required" });
const { data: updated, error } = await db
.from("documents")
.update({ filename, updated_at: new Date().toISOString() })
.eq("id", documentId)
.eq("project_id", projectId)
.select("*")
.single();
if (error || !updated)
return void res.status(404).json({ detail: "Document not found" });
if (doc.current_version_id) {
await db
.from("document_versions")
.update({ display_name: filename })
.eq("id", doc.current_version_id)
.eq("document_id", documentId);
}
res.json(updated);
});
// POST /projects/:projectId/documents
projectsRouter.post(
"/:projectId/documents",
requireAuth,
singleFileUpload("file"),
async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId } = req.params;
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok)
return void res.status(404).json({ detail: "Project not found" });
await handleDocumentUpload(req, res, userId, projectId, db);
},
);
// GET /projects/:projectId/chats — every assistant chat under this project
// (any author with project access). Used by the project page's chat tab so
// it doesn't have to filter the global GET /chat list — and so collaborators
// see each other's chats inside the project even though those don't appear
// in the global list.
projectsRouter.get("/:projectId/chats", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId } = req.params;
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok)
return void res.status(404).json({ detail: "Project not found" });
const { data, error } = await db
.from("chats")
.select("*")
.eq("project_id", projectId)
.order("created_at", { ascending: false });
if (error) return void res.status(500).json({ detail: error.message });
res.json(data ?? []);
});
// ── Folder routes ─────────────────────────────────────────────────────────────
// POST /projects/:projectId/folders
projectsRouter.post("/:projectId/folders", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId } = req.params;
const { name, parent_folder_id } = req.body as { name: string; parent_folder_id?: string | null };
if (!name?.trim()) return void res.status(400).json({ detail: "name is required" });
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok) return void res.status(404).json({ detail: "Project not found" });
// Verify parent folder belongs to this project
if (parent_folder_id) {
const { data: parent } = await db.from("project_subfolders").select("id").eq("id", parent_folder_id).eq("project_id", projectId).single();
if (!parent) return void res.status(404).json({ detail: "Parent folder not found" });
}
const { data, error } = await db.from("project_subfolders").insert({
project_id: projectId,
user_id: userId,
name: name.trim(),
parent_folder_id: parent_folder_id ?? null,
}).select("*").single();
if (error) return void res.status(500).json({ detail: error.message });
res.status(201).json(data);
});
// PATCH /projects/:projectId/folders/:folderId
projectsRouter.patch("/:projectId/folders/:folderId", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId, folderId } = req.params;
const body = req.body as { name?: string; parent_folder_id?: string | null };
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok) return void res.status(404).json({ detail: "Project not found" });
const updates: Record<string, unknown> = { updated_at: new Date().toISOString() };
if (body.name != null) updates.name = body.name.trim();
if ("parent_folder_id" in body) {
// Cycle check: walk up the tree from the proposed parent to ensure folderId is not an ancestor
if (body.parent_folder_id) {
const parent = await loadProjectFolder(db, projectId, body.parent_folder_id);
if (!parent) return void res.status(404).json({ detail: "Parent folder not found" });
let cur: string | null = body.parent_folder_id;
while (cur) {
if (cur === folderId) return void res.status(400).json({ detail: "Cannot move a folder into itself or a descendant" });
const p = await loadProjectFolder(db, projectId, cur);
if (!p) return void res.status(404).json({ detail: "Parent folder not found" });
cur = p?.parent_folder_id ?? null;
}
}
updates.parent_folder_id = body.parent_folder_id ?? null;
}
const { data, error } = await db.from("project_subfolders")
.update(updates)
.eq("id", folderId).eq("project_id", projectId)
.select("*").single();
if (error || !data) return void res.status(404).json({ detail: "Folder not found" });
res.json(data);
});
// DELETE /projects/:projectId/folders/:folderId
projectsRouter.delete("/:projectId/folders/:folderId", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId, folderId } = req.params;
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok) return void res.status(404).json({ detail: "Project not found" });
const folder = await loadProjectFolder(db, projectId, folderId);
if (!folder) return void res.status(404).json({ detail: "Folder not found" });
// Move direct documents to root before cascade-deleting subfolders
await db.from("documents").update({ folder_id: null }).eq("folder_id", folderId).eq("project_id", projectId);
const { error } = await db.from("project_subfolders")
.delete().eq("id", folderId).eq("project_id", projectId);
if (error) return void res.status(500).json({ detail: error.message });
res.status(204).send();
});
// PATCH /projects/:projectId/documents/:documentId/folder — move doc to a folder
projectsRouter.patch("/:projectId/documents/:documentId/folder", requireAuth, async (req, res) => {
const userId = res.locals.userId as string;
const userEmail = res.locals.userEmail as string | undefined;
const { projectId, documentId } = req.params;
const { folder_id } = req.body as { folder_id: string | null };
const db = createServerSupabase();
const access = await checkProjectAccess(projectId, userId, userEmail, db);
if (!access.ok) return void res.status(404).json({ detail: "Project not found" });
if (folder_id) {
const folder = await loadProjectFolder(db, projectId, folder_id);
if (!folder) return void res.status(404).json({ detail: "Folder not found" });
}
const { data, error } = await db.from("documents")
.update({ folder_id: folder_id ?? null, updated_at: new Date().toISOString() })
.eq("id", documentId).eq("project_id", projectId)
.select("*").single();
if (error || !data) return void res.status(404).json({ detail: "Document not found" });
res.json(data);
});
async function loadProjectFolder(
db: ReturnType<typeof createServerSupabase>,
projectId: string,
folderId: string,
): Promise<{ id: string; parent_folder_id: string | null } | null> {
const { data } = await db
.from("project_subfolders")
.select("id, parent_folder_id")
.eq("id", folderId)
.eq("project_id", projectId)
.maybeSingle();
return (data as { id: string; parent_folder_id: string | null } | null) ?? null;
}
export async function handleDocumentUpload(
req: import("express").Request,
res: import("express").Response,
userId: string,
projectId: string | null,
db: ReturnType<typeof createServerSupabase>,
) {
const file = req.file;
if (!file) return void res.status(400).json({ detail: "file is required" });
const filename = file.originalname;
const suffix = filename.includes(".")
? filename.split(".").pop()!.toLowerCase()
: "";
if (!ALLOWED_TYPES.has(suffix))
return void res
.status(400)
.json({
detail: `Unsupported file type: ${suffix}. Allowed: pdf, docx, doc`,
});
const content = file.buffer;
const { data: doc, error: insertErr } = await db
.from("documents")
.insert({
project_id: projectId,
user_id: userId,
filename,
file_type: suffix,
size_bytes: content.byteLength,
status: "processing",
})
.select("*")
.single();
if (insertErr || !doc)
return void res
.status(500)
.json({ detail: "Failed to create document record" });
try {
const docId = doc.id as string;
const key = storageKey(userId, docId, filename);
const contentType =
suffix === "pdf"
? "application/pdf"
: "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
await uploadFile(
key,
content.buffer.slice(
content.byteOffset,
content.byteOffset + content.byteLength,
) as ArrayBuffer,
contentType,
);
const rawBuf = content.buffer.slice(
content.byteOffset,
content.byteOffset + content.byteLength,
) as ArrayBuffer;
const tree = await extractStructureTree(rawBuf, suffix, filename);
const pageCount = suffix === "pdf" ? await countPdfPages(rawBuf) : null;
// Convert DOCX/DOC → PDF for display. PDFs are their own rendition.
let pdfStoragePath: string | null = null;
if (suffix === "docx" || suffix === "doc") {
try {
const pdfBuf = await docxToPdf(content);
const pdfKey = convertedPdfKey(userId, docId);
await uploadFile(
pdfKey,
pdfBuf.buffer.slice(
pdfBuf.byteOffset,
pdfBuf.byteOffset + pdfBuf.byteLength,
) as ArrayBuffer,
"application/pdf",
);
pdfStoragePath = pdfKey;
} catch (err) {
console.error(
`[upload] DOCX→PDF conversion failed for ${filename}:`,
err,
);
}
} else if (suffix === "pdf") {
pdfStoragePath = key;
}
// Storage paths live on document_versions — create the V1 row and
// point documents.current_version_id at it.
const { data: versionRow, error: verErr } = await db
.from("document_versions")
.insert({
document_id: docId,
storage_path: key,
pdf_storage_path: pdfStoragePath,
source: "upload",
version_number: 1,
display_name: filename,
})
.select("id")
.single();
if (verErr || !versionRow) {
throw new Error(
`Failed to record upload version: ${verErr?.message ?? "unknown"}`,
);
}
await db
.from("documents")
.update({
current_version_id: versionRow.id,
size_bytes: content.byteLength,
page_count: pageCount,
structure_tree: tree ?? null,
status: "ready",
updated_at: new Date().toISOString(),
})
.eq("id", docId);
const { data: updated } = await db
.from("documents")
.select("*")
.eq("id", docId)
.single();
const responseDoc = updated
? {
...updated,
storage_path: key,
pdf_storage_path: pdfStoragePath,
}
: updated;
return void res.status(201).json(responseDoc);
} catch (e) {
await db.from("documents").update({ status: "error" }).eq("id", doc.id);
return void res
.status(500)
.json({ detail: `Document processing failed: ${String(e)}` });
}
}
async function countPdfPages(buf: ArrayBuffer): Promise<number | null> {
try {
const pdfjsLib = await import("pdfjs-dist/legacy/build/pdf.mjs" as string);
const pdf = await (
pdfjsLib as unknown as {
getDocument: (opts: unknown) => {
promise: Promise<{ numPages: number }>;
};
}
).getDocument({ data: new Uint8Array(buf) }).promise;
return pdf.numPages;
} catch {
return null;
}
}
async function extractStructureTree(
content: ArrayBuffer,
fileType: string,
filename: string,
): Promise<unknown[] | null> {
try {
if (fileType === "pdf") {
const pdfjsLib = await import(
"pdfjs-dist/legacy/build/pdf.mjs" as string
);
const pdf = await (
pdfjsLib as unknown as {
getDocument: (opts: unknown) => {
promise: Promise<{
numPages: number;
getOutline: () => Promise<{ title?: string }[]>;
}>;
};
}
).getDocument({ data: new Uint8Array(content) }).promise;
if (pdf.numPages <= 5) return null;
const outline = await pdf.getOutline();
if (outline?.length) {
return outline.map((item, i) => ({
id: `h1-${i}`,
title: item.title ?? `Item ${i + 1}`,
level: 1,
page_number: null,
children: [],
}));
}
return Array.from({ length: pdf.numPages }, (_, i) => ({
id: `page-${i + 1}`,
title: `Page ${i + 1}`,
level: 1,
page_number: i + 1,
children: [],
}));
} else {
const mammoth = await import("mammoth");
const result = await mammoth.extractRawText({
buffer: Buffer.from(content),
});
const lines = result.value.split("\n").filter((l) => l.trim());
const nodes = lines
.slice(0, 30)
.map((line, i) => ({
id: `h1-${i}`,
title: line.slice(0, 100),
level: 1,
page_number: null,
children: [],
}));
return nodes.length ? nodes : null;
}
} catch {
return null;
}
}