Add local repo contents

This commit is contained in:
willchen96 2026-04-29 19:49:06 +02:00
parent 65739ef1ce
commit d9690965b5
176 changed files with 68998 additions and 0 deletions

146
backend/src/lib/access.ts Normal file
View file

@ -0,0 +1,146 @@
/**
* Project / document access helpers.
*
* Sharing makes the previous "scope by user_id" pattern incorrect a doc
* can belong to user A's project that A has shared with B's email, and B
* must still be able to read/edit it. These helpers centralize the
* "owner OR shared project member" check so every route uses the same
* logic instead of re-implementing the join.
*
* Returned `isOwner` lets callers gate operations that should stay
* owner-only (delete, rename, member management).
*/
import type { createServerSupabase } from "./supabase";
type Db = ReturnType<typeof createServerSupabase>;
export type ProjectAccess =
| {
ok: true;
isOwner: boolean;
project: {
id: string;
user_id: string;
shared_with: string[] | null;
};
}
| { ok: false };
export async function checkProjectAccess(
projectId: string,
userId: string,
userEmail: string | null | undefined,
db: Db,
): Promise<ProjectAccess> {
const { data: project } = await db
.from("projects")
.select("id, user_id, shared_with")
.eq("id", projectId)
.single();
if (!project) return { ok: false };
const proj = project as {
id: string;
user_id: string;
shared_with: string[] | null;
};
if (proj.user_id === userId) {
return { ok: true, isOwner: true, project: proj };
}
const sharedWith = Array.isArray(proj.shared_with) ? proj.shared_with : [];
const email = (userEmail ?? "").toLowerCase();
if (
email &&
sharedWith.some((e) => (e ?? "").toLowerCase() === email)
) {
return { ok: true, isOwner: false, project: proj };
}
return { ok: false };
}
/**
* Check whether the current user can access a document the caller has
* already loaded (saves a round-trip vs. having the helper re-fetch).
* Owner-of-doc passes immediately; otherwise we fall through to a
* project-membership check via `shared_with`.
*/
export async function ensureDocAccess(
doc: { user_id: string; project_id: string | null },
userId: string,
userEmail: string | null | undefined,
db: Db,
): Promise<{ ok: true; isOwner: boolean } | { ok: false }> {
if (doc.user_id === userId) return { ok: true, isOwner: true };
if (!doc.project_id) return { ok: false };
const access = await checkProjectAccess(
doc.project_id,
userId,
userEmail,
db,
);
if (access.ok) return { ok: true, isOwner: false };
return { ok: false };
}
/**
* Same shape as `ensureDocAccess`, for tabular_reviews. A review can be
* shared in two ways:
* 1. Indirectly if `project_id` is set, everyone with project access
* can read/operate on it.
* 2. Directly `tabular_reviews.shared_with` is a per-review email list
* so standalone reviews (project_id null) can also be shared.
* The owner (review.user_id) always has access.
*/
export async function ensureReviewAccess(
review: {
user_id: string;
project_id: string | null;
shared_with?: string[] | null;
},
userId: string,
userEmail: string | null | undefined,
db: Db,
): Promise<{ ok: true; isOwner: boolean } | { ok: false }> {
if (review.user_id === userId) return { ok: true, isOwner: true };
const email = (userEmail ?? "").toLowerCase();
if (email && Array.isArray(review.shared_with)) {
if (review.shared_with.some((e) => (e ?? "").toLowerCase() === email)) {
return { ok: true, isOwner: false };
}
}
if (!review.project_id) return { ok: false };
const access = await checkProjectAccess(
review.project_id,
userId,
userEmail,
db,
);
if (access.ok) return { ok: true, isOwner: false };
return { ok: false };
}
/**
* Returns the set of project IDs the user can access own projects plus
* any project where their email is in `shared_with`. Used to scope chat
* lists and similar collection queries.
*/
export async function listAccessibleProjectIds(
userId: string,
userEmail: string | null | undefined,
db: Db,
): Promise<string[]> {
const [{ data: own }, { data: shared }] = await Promise.all([
db.from("projects").select("id").eq("user_id", userId),
userEmail
? db
.from("projects")
.select("id")
.contains("shared_with", [userEmail])
.neq("user_id", userId)
: Promise.resolve({ data: [] as { id: string }[] }),
]);
const ids = new Set<string>();
for (const p of (own ?? []) as { id: string }[]) ids.add(p.id);
for (const p of (shared ?? []) as { id: string }[]) ids.add(p.id);
return [...ids];
}

View file

@ -0,0 +1,76 @@
export const BUILTIN_WORKFLOWS: { id: string; title: string; prompt_md: string }[] = [
{
id: "builtin-cp-checklist",
title: "Generate CP Checklist",
prompt_md:
"## Generate Conditions Precedent Checklist\n\n" +
"Review the uploaded credit agreement or financing document and generate a comprehensive " +
"Conditions Precedent (CP) checklist.\n\n" +
"You MUST use the generate_docx tool to produce the checklist as a downloadable Word document. " +
"You MUST pass landscape: true to the generate_docx tool — the document must be in landscape orientation. " +
"Do not display the checklist inline — generate the .docx file and provide the download link.\n\n" +
"Structure the document as follows:\n" +
"- For each category of conditions (e.g. Corporate, Financial, Legal, Security), add a section with a heading\n" +
"- Under each category heading, include a table with exactly these four columns in this order:\n" +
" 1. Index — sequential number within the category (1, 2, 3…)\n" +
" 2. Clause Number — the clause or schedule reference from the agreement\n" +
" 3. Clause — a concise description of the condition precedent\n" +
" 4. Status — leave blank (empty string) for the user to fill in\n\n" +
"Use the table field in the section object (not content) for each category's rows.\n\n" +
"Before finalizing, double-check that every table is formatted correctly: each table must have exactly the four columns above in the same order, headers must match exactly (Index, Clause Number, Clause, Status), every row must have the same number of cells as the headers, the Index column must be sequential starting from 1 within each category, and no cells should contain stray markdown, newlines, or placeholder text (use an empty string for Status).",
},
{
id: "builtin-credit-summary",
title: "Credit Agreement Summary",
prompt_md:
"## Credit Agreement Summary\n\n" +
"Review the uploaded credit agreement and produce a comprehensive legal summary covering the following topics. " +
"For each section, identify the key provisions, quote the relevant clause or schedule references, and flag any unusual, onerous, or non-market terms.\n\n" +
"1. **Lenders** — All lenders or members of the lender syndicate, including their full legal name and role (e.g. mandated lead arranger, original lender, agent bank)\n" +
"2. **Borrowers** — All borrowers, including their full legal name and jurisdiction of incorporation\n" +
"3. **Guarantors** — All guarantors, including their full legal name and the scope of their guarantee obligation\n" +
"4. **Other Parties** — Any other material parties (e.g. facility agent, security agent, hedge counterparties, issuing bank) and their roles\n" +
"5. **Date of Agreement** — Date of the credit agreement\n" +
"6. **Facilities** — Each facility available (e.g. Revolving Credit Facility, Term Loan A, Term Loan B, Term Loan C), the facility type, tranche name, and any key structural features\n" +
"7. **Amount** — Total committed amount across all facilities, the currency, and breakdown by tranche if applicable\n" +
"8. **Purpose** — Stated purpose for which borrowings may be used and any restrictions on use of proceeds\n" +
"9. **Interest** — Applicable reference rate (e.g. SOFR, EURIBOR, base rate), the margin, any margin ratchet mechanism, and how interest periods are structured\n" +
"10. **Commitment Fee** — Commitment or utilisation fees, the applicable rate, how they are calculated, and the basis (e.g. undrawn commitment, average utilisation)\n" +
"11. **Repayment Schedule** — Repayment profile for each facility, whether by scheduled instalments or bullet repayment, and the repayment dates and amounts\n" +
"12. **Maturity** — Final maturity date for each facility\n" +
"13. **Security** — Each class of security granted or required (e.g. share pledges, fixed and floating charges, real estate mortgages, account pledges) and the assets or entities over which security is taken\n" +
"14. **Guarantees** — Guarantee obligations, the guarantors, the scope of the guarantee, and any limitations (e.g. up-stream guarantee limitations, guarantor coverage test)\n" +
"15. **Financial Covenants** — Each financial covenant, the metric (e.g. leverage ratio, interest cover, cashflow cover), the applicable test, testing frequency, and any equity cure rights\n" +
"16. **Events of Default** — Each event of default, noting any grace periods, materiality thresholds, or cross-default provisions\n" +
"17. **Assignment** — Restrictions or permissions on assignment or transfer (e.g. white/blacklists, borrower consent for lender transfers; restrictions on borrower assignment)\n" +
"18. **Change of Control** — What constitutes a change of control, what obligations it triggers (e.g. mandatory prepayment, cancellation, lender consent), and any cure period\n" +
"19. **Prepayment Fee** — Any prepayment fees, make-whole premiums, or soft-call protections, the applicable fee, the period during which it applies, and any exceptions (e.g. prepayment from insurance proceeds or asset disposals)\n" +
"20. **Governing Law** — Governing law of the agreement\n" +
"21. **Dispute Resolution** — Whether disputes go to litigation or arbitration, the chosen forum or seat, and any submission to jurisdiction provisions\n\n" +
"Deliver the summary inline in your chat response — do NOT call generate_docx. Only produce a downloadable Word document if the user explicitly asks for one.",
},
{
id: "builtin-sha-summary",
title: "Shareholder Agreement Summary",
prompt_md:
"## Shareholder Agreement Summary\n\n" +
"Review the uploaded shareholder agreement and produce a comprehensive legal summary covering the following topics. " +
"For each section, identify the key provisions, quote the relevant clause references, and flag any unusual, onerous, or market-standard deviations.\n\n" +
"1. **Parties & Shareholdings** — Full legal names, roles, share classes held, and percentage interests (on a fully diluted basis if stated)\n" +
"2. **Share Classes & Rights** — For each class: voting rights, dividend rights, liquidation preference, conversion or redemption features\n" +
"3. **Board Composition & Governance** — Board size, director appointment rights (and the shareholding thresholds required to maintain them), quorum, and casting vote\n" +
"4. **Reserved Matters** — Decisions requiring a special majority, unanimity, or a specific shareholder's consent; note the threshold and whose consent is required for each\n" +
"5. **Pre-emption on New Shares** — Who holds pre-emption rights, procedure, timeline, and any carve-outs (e.g. employee option schemes)\n" +
"6. **Transfer Restrictions** — Lock-up periods, prohibited transfers, permitted transfers (e.g. to affiliates), and any board or shareholder approval requirements\n" +
"7. **Right of First Refusal / Pre-emption on Transfer** — Trigger, procedure, pricing mechanics, and any exceptions\n" +
"8. **Drag-Along Rights** — Who holds the right, threshold to trigger, conditions (e.g. minimum price, independent valuation), and minority protections\n" +
"9. **Tag-Along Rights** — Who holds the right, triggering threshold, exercise procedure, and price terms\n" +
"10. **Anti-Dilution Protections** — Type (full ratchet, weighted average), trigger events, calculation mechanics, and exceptions\n" +
"11. **Dividend Policy** — Any obligation or target to pay dividends, preferential dividend rights, and restrictions on distributions\n" +
"12. **Exit & Liquidity** — Agreed exit routes (trade sale, IPO, drag sale), timelines, and liquidation preferences on exit\n" +
"13. **Deadlock** — Deadlock definition, escalation and resolution mechanisms (e.g. Russian roulette, put/call options), and consequences if unresolved\n" +
"14. **Non-Compete & Non-Solicitation** — Who is bound, scope of activities and geography, duration, and carve-outs\n" +
"15. **Governing Law & Dispute Resolution** — Applicable law, forum, arbitration or litigation, and any mandatory escalation steps\n\n" +
"Generate the summary as a downloadable Word document.",
},
];

2838
backend/src/lib/chatTools.ts Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
import { promisify } from "util";
import JSZip from "jszip";
let _convert:
| ((buf: Buffer, ext: string, filter: undefined) => Promise<Buffer>)
| null = null;
async function getConvert() {
if (!_convert) {
const libre = await import("libreoffice-convert");
_convert = promisify(libre.default.convert.bind(libre.default));
}
return _convert;
}
/**
* Some older Windows/Word archives store .docx entries with backslash
* separators (e.g. `word\document.xml`). Mammoth and LibreOffice both look
* up entries by exact string and miss those files, producing empty output
* or conversion failures. Rewrite any such entries to the canonical
* forward-slash form before handing the buffer off.
*/
export async function normalizeDocxZipPaths(buffer: Buffer): Promise<Buffer> {
let zip: JSZip;
try {
zip = await JSZip.loadAsync(buffer);
} catch {
return buffer;
}
const renames: [string, string][] = [];
zip.forEach((relativePath) => {
if (relativePath.includes("\\")) {
renames.push([relativePath, relativePath.replace(/\\/g, "/")]);
}
});
if (renames.length === 0) return buffer;
for (const [oldPath, newPath] of renames) {
const entry = zip.file(oldPath);
if (!entry) continue;
const content = await entry.async("nodebuffer");
zip.remove(oldPath);
zip.file(newPath, content);
}
return zip.generateAsync({ type: "nodebuffer" });
}
/**
* Convert a DOCX/DOC buffer to PDF using LibreOffice.
* Throws if LibreOffice is not installed or conversion fails.
*/
export async function docxToPdf(buffer: Buffer): Promise<Buffer> {
const convert = await getConvert();
const normalized = await normalizeDocxZipPaths(buffer);
return convert(normalized, ".pdf", undefined);
}
export function convertedPdfKey(userId: string, docId: string): string {
return `converted-pdfs/${userId}/${docId}.pdf`;
}

View file

@ -0,0 +1,159 @@
import type { createServerSupabase } from "./supabase";
type Supa = ReturnType<typeof createServerSupabase>;
interface DocRow {
id: string;
latest_version_number?: number | null;
[k: string]: unknown;
}
interface VersionPathRow extends DocRow {
/** Set from document_versions.storage_path of the active version. */
storage_path?: string | null;
/** Set from document_versions.pdf_storage_path of the active version. */
pdf_storage_path?: string | null;
current_version_id?: string | null;
/** Set from document_versions.version_number of the active version. */
active_version_number?: number | null;
}
export interface ActiveVersion {
id: string;
storage_path: string;
pdf_storage_path: string | null;
version_number: number | null;
display_name: string | null;
source: string | null;
}
/**
* Resolve storage paths for a document. Prefers the version pointed to by
* `versionId` (if it belongs to this document); else falls back to
* `documents.current_version_id`. Returns null if no usable version exists.
*
* After the storage_path/pdf_storage_path columns moved off `documents`,
* every read-from-storage path goes through here.
*/
export async function loadActiveVersion(
documentId: string,
db: Supa,
versionId?: string | null,
): Promise<ActiveVersion | null> {
const { data: doc } = await db
.from("documents")
.select("current_version_id")
.eq("id", documentId)
.single();
const targetVersionId =
(typeof versionId === "string" && versionId) ||
(doc?.current_version_id as string | undefined) ||
null;
if (!targetVersionId) return null;
const { data: v } = await db
.from("document_versions")
.select(
"id, document_id, storage_path, pdf_storage_path, version_number, display_name, source",
)
.eq("id", targetVersionId)
.single();
if (!v || v.document_id !== documentId || !v.storage_path) return null;
return {
id: v.id as string,
storage_path: v.storage_path as string,
pdf_storage_path: (v.pdf_storage_path as string | null) ?? null,
version_number: (v.version_number as number | null) ?? null,
display_name: (v.display_name as string | null) ?? null,
source: (v.source as string | null) ?? null,
};
}
/**
* For a list of documents, look up the active version for each and merge
* `storage_path` + `pdf_storage_path` onto the row. One round-trip total
* regardless of list size. Documents with no current_version_id retain
* null paths.
*/
export async function attachActiveVersionPaths<T extends VersionPathRow>(
db: Supa,
docs: T[],
): Promise<T[]> {
if (docs.length === 0) return docs;
const versionIds = docs
.map((d) => d.current_version_id)
.filter((id): id is string => typeof id === "string");
if (versionIds.length === 0) {
for (const d of docs) {
d.storage_path = null;
d.pdf_storage_path = null;
}
return docs;
}
const { data: rows } = await db
.from("document_versions")
.select("id, storage_path, pdf_storage_path, version_number")
.in("id", versionIds);
const byId = new Map<
string,
{
storage_path: string | null;
pdf_storage_path: string | null;
version_number: number | null;
}
>();
for (const r of (rows ?? []) as {
id: string;
storage_path: string | null;
pdf_storage_path: string | null;
version_number: number | null;
}[]) {
byId.set(r.id, {
storage_path: r.storage_path ?? null,
pdf_storage_path: r.pdf_storage_path ?? null,
version_number: r.version_number ?? null,
});
}
for (const d of docs) {
const v = d.current_version_id ? byId.get(d.current_version_id) : null;
d.storage_path = v?.storage_path ?? null;
d.pdf_storage_path = v?.pdf_storage_path ?? null;
d.active_version_number = v?.version_number ?? null;
}
return docs;
}
/**
* Given a list of document rows, attach `latest_version_number` the
* max `version_number` across all assistant_edit rows for that doc, or
* null if none. Mutates rows in place and returns the same reference.
* One extra query regardless of list size.
*/
export async function attachLatestVersionNumbers<T extends DocRow>(
db: Supa,
docs: T[],
): Promise<T[]> {
if (docs.length === 0) return docs;
const ids = docs.map((d) => d.id);
const { data: rows } = await db
.from("document_versions")
.select("document_id, version_number")
.in("document_id", ids)
.eq("source", "assistant_edit")
.not("version_number", "is", null);
const latestByDoc = new Map<string, number>();
for (const r of (rows ?? []) as {
document_id: string;
version_number: number | null;
}[]) {
if (r.version_number == null) continue;
const prev = latestByDoc.get(r.document_id) ?? 0;
if (r.version_number > prev)
latestByDoc.set(r.document_id, r.version_number);
}
for (const d of docs) {
d.latest_version_number = latestByDoc.get(d.id) ?? null;
}
return docs;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,78 @@
import crypto from "crypto";
/**
* HMAC-signed, non-expiring download tokens.
*
* The token encodes the R2 storage path + filename; the backend route
* `/download/:token` validates the signature and streams the file. This
* gives persistent links safe to store in chat history without signed-URL
* expiry or R2 CORS headaches.
*/
function getSecret(): string {
return (
process.env.DOWNLOAD_SIGNING_SECRET ??
process.env.SUPABASE_SECRET_KEY ??
"dev-secret"
);
}
function b64urlEncode(buf: Buffer): string {
return buf
.toString("base64")
.replace(/\+/g, "-")
.replace(/\//g, "_")
.replace(/=+$/g, "");
}
function b64urlDecode(s: string): Buffer {
let t = s.replace(/-/g, "+").replace(/_/g, "/");
while (t.length % 4) t += "=";
return Buffer.from(t, "base64");
}
function timingSafeEqStr(a: string, b: string): boolean {
if (a.length !== b.length) return false;
return crypto.timingSafeEqual(Buffer.from(a), Buffer.from(b));
}
export function signDownload(path: string, filename: string): string {
const payload = JSON.stringify({ p: path, f: filename });
const enc = b64urlEncode(Buffer.from(payload, "utf8"));
const sig = crypto
.createHmac("sha256", getSecret())
.update(enc)
.digest();
return `${enc}.${b64urlEncode(sig)}`;
}
export function verifyDownload(
token: string,
): { path: string; filename: string } | null {
const parts = token.split(".");
if (parts.length !== 2) return null;
const [enc, sigEnc] = parts;
const expected = crypto
.createHmac("sha256", getSecret())
.update(enc)
.digest();
if (!timingSafeEqStr(sigEnc, b64urlEncode(expected))) return null;
try {
const parsed = JSON.parse(b64urlDecode(enc).toString("utf8")) as {
p: string;
f: string;
};
if (!parsed?.p || !parsed?.f) return null;
return { path: parsed.p, filename: parsed.f };
} catch {
return null;
}
}
/**
* Returns a relative download URL (e.g. "/download/abc.def"). The frontend
* prefixes it with NEXT_PUBLIC_API_BASE_URL when rendering `<a href=…>`.
*/
export function buildDownloadUrl(path: string, filename: string): string {
return `/download/${signDownload(path, filename)}`;
}

View file

@ -0,0 +1,172 @@
import Anthropic from "@anthropic-ai/sdk";
import type { Tool } from "@anthropic-ai/sdk/resources/messages/messages";
import * as fs from "fs";
import * as path from "path";
import type {
StreamChatParams,
StreamChatResult,
NormalizedToolCall,
NormalizedToolResult,
} from "./types";
import { toClaudeTools } from "./tools";
const RAW_STREAM_LOG_PATH = path.resolve(
process.cwd(),
"claude-raw-stream.log",
);
type ContentBlock =
| { type: "text"; text: string }
| { type: "tool_use"; id: string; name: string; input: unknown }
| { type: string; [key: string]: unknown };
type NativeMessage = {
role: "user" | "assistant";
content: string | ContentBlock[];
};
const MAX_TOKENS = 16384;
function client(override?: string | null): Anthropic {
const apiKey = override?.trim() || process.env.ANTHROPIC_API_KEY || "";
return new Anthropic({ apiKey });
}
function toNativeMessages(
messages: StreamChatParams["messages"],
): NativeMessage[] {
return messages.map((m) => ({ role: m.role, content: m.content }));
}
export async function streamClaude(
params: StreamChatParams,
): Promise<StreamChatResult> {
const {
model,
systemPrompt,
tools = [],
callbacks = {},
runTools,
apiKeys,
enableThinking,
} = params;
const maxIter = params.maxIterations ?? 10;
const anthropic = client(apiKeys?.claude);
const claudeTools = toClaudeTools(tools);
const messages: NativeMessage[] = toNativeMessages(params.messages);
let fullText = "";
for (let iter = 0; iter < maxIter; iter++) {
const stream = anthropic.messages.stream({
model,
system: systemPrompt,
messages: messages as Anthropic.MessageParam[],
tools: claudeTools.length
? (claudeTools as unknown as Tool[])
: undefined,
max_tokens: MAX_TOKENS,
// Claude 4.x models require `thinking.type: "adaptive"` and
// drive effort via `output_config.effort` rather than a fixed
// token budget. We only opt in when the caller requested it.
...(enableThinking
? ({
thinking: { type: "adaptive" },
output_config: { effort: "high" },
} as unknown as Record<string, unknown>)
: {}),
// Extended thinking requires temperature to be default (omitted).
});
let sawThinking = false;
stream.on("streamEvent", (event) => {
const line = JSON.stringify(event);
console.log("[claude raw stream]", line);
fs.appendFile(RAW_STREAM_LOG_PATH, line + "\n", () => {});
});
stream.on("text", (delta) => {
callbacks.onContentDelta?.(delta);
});
if (enableThinking) {
stream.on("thinking", (delta) => {
sawThinking = true;
callbacks.onReasoningDelta?.(delta);
});
}
const final = await stream.finalMessage();
if (sawThinking) callbacks.onReasoningBlockEnd?.();
const stopReason = final.stop_reason;
const assistantBlocks = final.content as ContentBlock[];
// Extract text content and tool_use calls from the final assistant
// message so we can accumulate text and drive the tool-call loop.
const toolCalls: NormalizedToolCall[] = [];
for (const block of assistantBlocks) {
if (block.type === "text") {
const txt = (block as { text: string }).text;
if (typeof txt === "string") fullText += txt;
} else if (block.type === "tool_use") {
const tu = block as {
id: string;
name: string;
input: unknown;
};
const call: NormalizedToolCall = {
id: tu.id,
name: tu.name,
input: (tu.input as Record<string, unknown>) ?? {},
};
callbacks.onToolCallStart?.(call);
toolCalls.push(call);
}
}
if (stopReason !== "tool_use" || !toolCalls.length || !runTools) {
break;
}
const results = await runTools(toolCalls);
// Record the assistant turn (preserving the original content blocks,
// which Claude requires on the follow-up) and the user turn that
// carries the tool_result blocks.
messages.push({ role: "assistant", content: assistantBlocks });
messages.push({
role: "user",
content: results.map((r) => ({
type: "tool_result",
tool_use_id: r.tool_use_id,
content: r.content,
})),
});
}
return { fullText };
}
export async function completeClaudeText(params: {
model: string;
systemPrompt?: string;
user: string;
maxTokens?: number;
apiKeys?: { claude?: string | null };
}): Promise<string> {
const anthropic = client(params.apiKeys?.claude);
const resp = await anthropic.messages.create({
model: params.model,
max_tokens: params.maxTokens ?? 512,
system: params.systemPrompt,
messages: [{ role: "user", content: params.user }],
});
const text = resp.content
.filter((b): b is Anthropic.TextBlock => b.type === "text")
.map((b) => b.text)
.join("");
return text;
}
// Helper re-export for callers wanting to hand normalized results back in.
export type { NormalizedToolResult };

View file

@ -0,0 +1,162 @@
import { GoogleGenAI } from "@google/genai";
import type {
StreamChatParams,
StreamChatResult,
NormalizedToolCall,
} from "./types";
import { toGeminiTools } from "./tools";
type GeminiPart = {
text?: string;
// Set by Gemini when the text content is a thought summary rather than
// final-answer prose. Requires `thinkingConfig.includeThoughts: true`.
thought?: boolean;
functionCall?: { id?: string; name: string; args?: Record<string, unknown> };
functionResponse?: {
id?: string;
name: string;
response: Record<string, unknown>;
};
// Gemini 3 returns a thoughtSignature on parts that contain reasoning or
// a functionCall. It must be echoed back verbatim on the same part when
// we replay the model's turn, or the API rejects the next call.
thoughtSignature?: string;
};
type GeminiContent = {
role: "user" | "model";
parts: GeminiPart[];
};
function client(override?: string | null): GoogleGenAI {
const apiKey = override?.trim() || process.env.GEMINI_API_KEY || "";
return new GoogleGenAI({ apiKey });
}
function toNativeContents(messages: StreamChatParams["messages"]): GeminiContent[] {
return messages.map((m) => ({
role: m.role === "assistant" ? "model" : "user",
parts: [{ text: m.content }],
}));
}
export async function streamGemini(
params: StreamChatParams,
): Promise<StreamChatResult> {
const { model, systemPrompt, tools = [], callbacks = {}, runTools, apiKeys, enableThinking } = params;
const maxIter = params.maxIterations ?? 10;
const ai = client(apiKeys?.gemini);
const functionDeclarations = toGeminiTools(tools);
const contents: GeminiContent[] = toNativeContents(params.messages);
let fullText = "";
for (let iter = 0; iter < maxIter; iter++) {
const stream = await ai.models.generateContentStream({
model,
contents: contents as never,
config: {
systemInstruction: systemPrompt,
tools: functionDeclarations.length
? [{ functionDeclarations } as never]
: undefined,
// When enabled, ask Gemini to surface thought summaries.
// When disabled, explicitly zero the thinking budget so the
// model skips thinking entirely (saves tokens and latency
// for bulk extraction jobs).
thinkingConfig: enableThinking
? { includeThoughts: true }
: { thinkingBudget: 0 },
},
});
// Per-iteration accumulators.
const textParts: string[] = [];
const callParts: GeminiPart[] = [];
const toolCalls: NormalizedToolCall[] = [];
let sawThinking = false;
for await (const chunk of stream) {
console.log("[gemini stream chunk]", JSON.stringify(chunk, null, 2));
const parts =
(chunk as { candidates?: { content?: { parts?: GeminiPart[] } }[] })
.candidates?.[0]?.content?.parts ?? [];
for (const part of parts) {
if (part.text) {
if (part.thought) {
sawThinking = true;
callbacks.onReasoningDelta?.(part.text);
} else {
textParts.push(part.text);
callbacks.onContentDelta?.(part.text);
}
}
if (part.functionCall) {
// Preserve the whole part (including thoughtSignature)
// so it can be echoed verbatim in the replay turn.
callParts.push(part);
const call: NormalizedToolCall = {
id: part.functionCall.id ?? `${part.functionCall.name}-${toolCalls.length}`,
name: part.functionCall.name,
input: part.functionCall.args ?? {},
};
callbacks.onToolCallStart?.(call);
toolCalls.push(call);
}
}
}
if (sawThinking) callbacks.onReasoningBlockEnd?.();
fullText += textParts.join("");
if (!toolCalls.length || !runTools) {
break;
}
const results = await runTools(toolCalls);
// Append the model's turn (text + functionCall parts, in that order)
// and the matching functionResponse turn.
const modelParts: GeminiPart[] = [];
if (textParts.length) modelParts.push({ text: textParts.join("") });
for (const cp of callParts) modelParts.push(cp);
contents.push({ role: "model", parts: modelParts });
contents.push({
role: "user",
parts: results.map((r) => {
const match = toolCalls.find((c) => c.id === r.tool_use_id);
return {
functionResponse: {
...(r.tool_use_id && !r.tool_use_id.startsWith(match?.name ?? "")
? { id: r.tool_use_id }
: {}),
name: match?.name ?? "tool",
response: { output: r.content },
},
};
}),
});
}
return { fullText };
}
export async function completeGeminiText(params: {
model: string;
systemPrompt?: string;
user: string;
apiKeys?: { gemini?: string | null };
}): Promise<string> {
const ai = client(params.apiKeys?.gemini);
const resp = await ai.models.generateContent({
model: params.model,
contents: [{ role: "user", parts: [{ text: params.user }] }],
config: params.systemPrompt
? { systemInstruction: params.systemPrompt }
: undefined,
});
return resp.text ?? "";
}

View file

@ -0,0 +1,27 @@
import { streamClaude, completeClaudeText } from "./claude";
import { streamGemini, completeGeminiText } from "./gemini";
import { providerForModel } from "./models";
import type { StreamChatParams, StreamChatResult, UserApiKeys } from "./types";
export * from "./types";
export * from "./models";
export async function streamChatWithTools(
params: StreamChatParams,
): Promise<StreamChatResult> {
const provider = providerForModel(params.model);
if (provider === "claude") return streamClaude(params);
return streamGemini(params);
}
export async function completeText(params: {
model: string;
systemPrompt?: string;
user: string;
maxTokens?: number;
apiKeys?: UserApiKeys;
}): Promise<string> {
const provider = providerForModel(params.model);
if (provider === "claude") return completeClaudeText(params);
return completeGeminiText(params);
}

View file

@ -0,0 +1,48 @@
import type { Provider } from "./types";
// ---------------------------------------------------------------------------
// Canonical model IDs
// ---------------------------------------------------------------------------
// Main-chat tier (top-end) — user picks one of these per message.
export const CLAUDE_MAIN_MODELS = ["claude-opus-4-7", "claude-sonnet-4-6"] as const;
export const GEMINI_MAIN_MODELS = [
"gemini-3.1-pro-preview",
"gemini-3-flash-preview",
] as const;
// Mid-tier (used for tabular review) — user picks one in account settings.
export const CLAUDE_MID_MODELS = ["claude-sonnet-4-6"] as const;
export const GEMINI_MID_MODELS = ["gemini-3-flash-preview"] as const;
// Low-tier (used for title generation, lightweight extractions) — user picks
// one in account settings.
export const CLAUDE_LOW_MODELS = ["claude-haiku-4-5"] as const;
export const GEMINI_LOW_MODELS = ["gemini-3.1-flash-lite-preview"] as const;
export const DEFAULT_MAIN_MODEL = "gemini-3-flash-preview";
export const DEFAULT_TITLE_MODEL = "gemini-3.1-flash-lite-preview";
export const DEFAULT_TABULAR_MODEL = "gemini-3-flash-preview";
const ALL_MODELS = new Set<string>([
...CLAUDE_MAIN_MODELS,
...GEMINI_MAIN_MODELS,
...CLAUDE_MID_MODELS,
...GEMINI_MID_MODELS,
...CLAUDE_LOW_MODELS,
...GEMINI_LOW_MODELS,
]);
// ---------------------------------------------------------------------------
// Provider inference
// ---------------------------------------------------------------------------
export function providerForModel(model: string): Provider {
if (model.startsWith("claude")) return "claude";
if (model.startsWith("gemini")) return "gemini";
throw new Error(`Unknown model id: ${model}`);
}
export function resolveModel(id: string | null | undefined, fallback: string): string {
if (id && ALL_MODELS.has(id)) return id;
return fallback;
}

View file

@ -0,0 +1,74 @@
import type { OpenAIToolSchema } from "./types";
// ---------------------------------------------------------------------------
// Tool-schema adapters
// ---------------------------------------------------------------------------
// Callers hand us OpenAI-style tool definitions. Provider-specific converters
// live here so the rest of the code never has to think about it.
export type ClaudeTool = {
name: string;
description: string;
input_schema: Record<string, unknown>;
};
export function toClaudeTools(tools: OpenAIToolSchema[]): ClaudeTool[] {
return tools.map((t) => ({
name: t.function.name,
description: t.function.description,
input_schema: normalizeSchema(t.function.parameters),
}));
}
export type GeminiFunctionDeclaration = {
name: string;
description: string;
parameters?: Record<string, unknown>;
};
export function toGeminiTools(tools: OpenAIToolSchema[]): GeminiFunctionDeclaration[] {
return tools.map((t) => {
const params = normalizeSchema(t.function.parameters);
// Gemini rejects `{ type: "object", properties: {} }` with no fields
// present; omit the parameters key entirely when empty.
const hasProps =
params &&
typeof params === "object" &&
Object.keys((params as { properties?: Record<string, unknown> }).properties ?? {}).length > 0;
return {
name: t.function.name,
description: t.function.description,
...(hasProps ? { parameters: params } : {}),
};
});
}
// ---------------------------------------------------------------------------
// Schema normalization
// ---------------------------------------------------------------------------
// The OpenAI tool schemas in the codebase already use plain JSON-Schema-lite
// shape. Both Claude and Gemini accept that shape. We only sanitise a couple
// of edge cases: `integer` is accepted by both, but we make sure arrays have
// `items` and objects have `properties` so Gemini doesn't error.
function normalizeSchema(schema: unknown): Record<string, unknown> {
if (!schema || typeof schema !== "object") {
return { type: "object", properties: {} };
}
const s = schema as Record<string, unknown>;
const type = s.type;
const out: Record<string, unknown> = { ...s };
if (type === "object") {
const props = (s.properties as Record<string, unknown>) ?? {};
const normProps: Record<string, unknown> = {};
for (const [k, v] of Object.entries(props)) {
normProps[k] = normalizeSchema(v);
}
out.properties = normProps;
}
if (type === "array" && s.items) {
out.items = normalizeSchema(s.items);
}
return out;
}

View file

@ -0,0 +1,64 @@
// Shared types for the LLM provider adapter.
// Callers always speak OpenAI-style tools + { role, content } messages; each
// provider translates internally.
export type Provider = "claude" | "gemini";
export type OpenAIToolSchema = {
type: "function";
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
};
export type LlmMessage = {
role: "user" | "assistant";
content: string;
};
export type NormalizedToolCall = {
id: string;
name: string;
input: Record<string, unknown>;
};
export type NormalizedToolResult = {
tool_use_id: string;
content: string;
};
export type StreamCallbacks = {
onReasoningDelta?: (text: string) => void;
onReasoningBlockEnd?: () => void;
onContentDelta?: (text: string) => void;
onToolCallStart?: (call: NormalizedToolCall) => void;
};
export type UserApiKeys = {
claude?: string | null;
gemini?: string | null;
};
export type StreamChatParams = {
model: string;
systemPrompt: string;
messages: LlmMessage[];
tools?: OpenAIToolSchema[];
maxIterations?: number;
callbacks?: StreamCallbacks;
runTools?: (calls: NormalizedToolCall[]) => Promise<NormalizedToolResult[]>;
apiKeys?: UserApiKeys;
/**
* Enable provider-side reasoning/thinking. Off by default should only
* be turned on for interactive chat surfaces where the user actually
* benefits from seeing the thought stream. Bulk extraction jobs and
* one-shot completions should leave this off to save tokens and latency.
*/
enableThinking?: boolean;
};
export type StreamChatResult = {
fullText: string;
};

185
backend/src/lib/storage.ts Normal file
View file

@ -0,0 +1,185 @@
/**
* Cloudflare R2 storage utilities for Mike document management.
* R2 is S3-compatible uses @aws-sdk/client-s3.
*
* Required env vars:
* R2_ENDPOINT_URL https://<account-id>.r2.cloudflarestorage.com
* R2_ACCESS_KEY_ID R2 API token (Access Key ID)
* R2_SECRET_ACCESS_KEY R2 API token (Secret Access Key)
* R2_BUCKET_NAME bucket name (default: "mike")
*/
import {
S3Client,
PutObjectCommand,
GetObjectCommand,
DeleteObjectCommand,
} from "@aws-sdk/client-s3";
import { getSignedUrl as awsGetSignedUrl } from "@aws-sdk/s3-request-presigner";
function getClient(): S3Client {
return new S3Client({
region: "auto",
endpoint: process.env.R2_ENDPOINT_URL!,
credentials: {
accessKeyId: process.env.R2_ACCESS_KEY_ID!,
secretAccessKey: process.env.R2_SECRET_ACCESS_KEY!,
},
});
}
const BUCKET = process.env.R2_BUCKET_NAME ?? "mike";
export const storageEnabled = Boolean(
process.env.R2_ENDPOINT_URL &&
process.env.R2_ACCESS_KEY_ID &&
process.env.R2_SECRET_ACCESS_KEY,
);
// ---------------------------------------------------------------------------
// Upload
// ---------------------------------------------------------------------------
export async function uploadFile(
key: string,
content: ArrayBuffer,
contentType: string,
): Promise<void> {
const client = getClient();
await client.send(
new PutObjectCommand({
Bucket: BUCKET,
Key: key,
Body: Buffer.from(content),
ContentType: contentType,
}),
);
}
// ---------------------------------------------------------------------------
// Download
// ---------------------------------------------------------------------------
export async function downloadFile(key: string): Promise<ArrayBuffer | null> {
if (!storageEnabled) return null;
try {
const client = getClient();
const response = await client.send(
new GetObjectCommand({ Bucket: BUCKET, Key: key }),
);
if (!response.Body) return null;
const bytes = await response.Body.transformToByteArray();
return bytes.buffer as ArrayBuffer;
} catch {
return null;
}
}
// ---------------------------------------------------------------------------
// Delete
// ---------------------------------------------------------------------------
export async function deleteFile(key: string): Promise<void> {
if (!storageEnabled) return;
const client = getClient();
await client.send(new DeleteObjectCommand({ Bucket: BUCKET, Key: key }));
}
// ---------------------------------------------------------------------------
// Signed URL (pre-signed for temporary direct access)
// ---------------------------------------------------------------------------
export async function getSignedUrl(
key: string,
expiresIn = 3600,
downloadFilename?: string,
): Promise<string | null> {
if (!storageEnabled) return null;
try {
const client = getClient();
// Override the response Content-Disposition so the browser uses this
// filename on download, instead of the last path segment of the R2 key
// (which includes the document UUID). The `download` attribute on <a>
// is ignored for cross-origin URLs, so we have to set it server-side.
const responseContentDisposition = downloadFilename
? buildContentDisposition("attachment", downloadFilename)
: undefined;
const command = new GetObjectCommand({
Bucket: BUCKET,
Key: key,
ResponseContentDisposition: responseContentDisposition,
});
return await awsGetSignedUrl(client, command, { expiresIn });
} catch {
return null;
}
}
export function normalizeDownloadFilename(name: string): string {
const trimmed = name.trim();
const base = trimmed || "download";
return base.replace(/[\x00-\x1F\x7F]/g, "_").replace(/[\\/]/g, "_");
}
export function sanitizeDispositionFilename(name: string): string {
return normalizeDownloadFilename(name).replace(/["\\]/g, "_");
}
export function encodeRFC5987(str: string): string {
return encodeURIComponent(str).replace(
/['()*]/g,
(c) => "%" + c.charCodeAt(0).toString(16).toUpperCase(),
);
}
export function buildContentDisposition(
kind: "inline" | "attachment",
filename: string,
): string {
const normalized = normalizeDownloadFilename(filename);
return `${kind}; filename="${sanitizeDispositionFilename(normalized)}"; filename*=UTF-8''${encodeRFC5987(normalized)}`;
}
// ---------------------------------------------------------------------------
// Storage key helpers
// ---------------------------------------------------------------------------
export function storageKey(
userId: string,
docId: string,
filename: string,
): string {
return `documents/${userId}/${docId}/source${storageExtension(filename, ".bin")}`;
}
export function pdfStorageKey(
userId: string,
docId: string,
stem: string,
): string {
return `documents/${userId}/${docId}/${stem}.pdf`;
}
export function generatedDocKey(
userId: string,
docId: string,
filename: string,
): string {
return `generated/${userId}/${docId}/generated${storageExtension(filename, ".docx")}`;
}
export function versionStorageKey(
userId: string,
docId: string,
versionSlug: string,
filename: string,
): string {
return `documents/${userId}/${docId}/versions/${versionSlug}${storageExtension(filename, ".bin")}`;
}
function storageExtension(filename: string, fallback: string): string {
const lastDot = filename.lastIndexOf(".");
if (lastDot < 0) return fallback;
const ext = filename.slice(lastDot).toLowerCase();
return /^\.[a-z0-9]{1,16}$/.test(ext) ? ext : fallback;
}

View file

@ -0,0 +1,41 @@
import { createClient } from "@supabase/supabase-js";
/**
* Server-side Supabase client using the service role key.
* Bypasses RLS only use in API routes after verifying the user.
*/
export function createServerSupabase() {
const url = process.env.SUPABASE_URL || "";
const key = process.env.SUPABASE_SECRET_KEY || "";
return createClient(url, key, { auth: { persistSession: false } });
}
/**
* Extract and verify the Supabase JWT from the Authorization header.
* Returns the user's UUID string, or throws a Response with 401.
*/
export async function getUserIdFromRequest(req: Request): Promise<string> {
const auth = req.headers.get("authorization") ?? "";
if (!auth.startsWith("Bearer ")) {
throw new Response("Missing or invalid Authorization header", {
status: 401,
});
}
const token = auth.slice(7).trim();
const supabaseUrl = process.env.SUPABASE_URL || "";
const serviceKey = process.env.SUPABASE_SECRET_KEY || "";
if (!supabaseUrl || !serviceKey) {
throw new Response("Server auth is not configured", { status: 500 });
}
const admin = createClient(supabaseUrl, serviceKey, {
auth: { persistSession: false },
});
const { data } = await admin.auth.getUser(token);
if (!data.user) {
throw new Response("Invalid or expired token", { status: 401 });
}
return data.user.id;
}

36
backend/src/lib/upload.ts Normal file
View file

@ -0,0 +1,36 @@
import type { RequestHandler } from "express";
import multer from "multer";
export const MAX_UPLOAD_SIZE_BYTES = 100 * 1024 * 1024;
export const MAX_UPLOAD_SIZE_MB = Math.round(
MAX_UPLOAD_SIZE_BYTES / (1024 * 1024),
);
const memoryUpload = multer({
storage: multer.memoryStorage(),
limits: {
fileSize: MAX_UPLOAD_SIZE_BYTES,
files: 1,
},
});
export function singleFileUpload(fieldName: string): RequestHandler {
return (req, res, next) => {
memoryUpload.single(fieldName)(req, res, (err) => {
if (!err) return next();
if (err instanceof multer.MulterError) {
if (err.code === "LIMIT_FILE_SIZE") {
return void res.status(413).json({
detail: `File too large. Maximum size is ${MAX_UPLOAD_SIZE_MB} MB.`,
});
}
return void res.status(400).json({
detail: `Upload failed: ${err.message}`,
});
}
return next(err);
});
};
}

View file

@ -0,0 +1,62 @@
import { createServerSupabase } from "./supabase";
import {
resolveModel,
DEFAULT_TITLE_MODEL,
DEFAULT_TABULAR_MODEL,
type UserApiKeys,
} from "./llm";
export type UserModelSettings = {
title_model: string;
tabular_model: string;
api_keys: UserApiKeys;
};
// Title generation is a lightweight task — always routed to the cheapest model
// of whichever provider the user has keys for: Gemini Flash Lite if Gemini is
// available, otherwise Claude Haiku. With no user keys set, defaults to Gemini
// (the dev-mode env fallback).
function resolveTitleModel(apiKeys: UserApiKeys): string {
if (apiKeys.gemini?.trim()) return DEFAULT_TITLE_MODEL;
if (apiKeys.claude?.trim()) return "claude-haiku-4-5";
return DEFAULT_TITLE_MODEL;
}
export async function getUserModelSettings(
userId: string,
db?: ReturnType<typeof createServerSupabase>,
): Promise<UserModelSettings> {
const client = db ?? createServerSupabase();
const { data } = await client
.from("user_profiles")
.select("tabular_model, claude_api_key, gemini_api_key")
.eq("user_id", userId)
.single();
const api_keys: UserApiKeys = {
claude: data?.claude_api_key ?? null,
gemini: data?.gemini_api_key ?? null,
};
return {
title_model: resolveTitleModel(api_keys),
tabular_model: resolveModel(data?.tabular_model, DEFAULT_TABULAR_MODEL),
api_keys,
};
}
export async function getUserApiKeys(
userId: string,
db?: ReturnType<typeof createServerSupabase>,
): Promise<UserApiKeys> {
const client = db ?? createServerSupabase();
const { data } = await client
.from("user_profiles")
.select("claude_api_key, gemini_api_key")
.eq("user_id", userId)
.single();
return {
claude: data?.claude_api_key ?? null,
gemini: data?.gemini_api_key ?? null,
};
}