Add local repo contents

2026-06-30 21:59:37 +02:00 · 2026-04-29 19:49:06 +02:00 · 2026-04-29 19:49:06 +02:00 · d9690965b5
commit d9690965b5
parent 65739ef1ce
176 changed files with 68998 additions and 0 deletions
--- a/backend/src/lib/llm/claude.ts
+++ b/backend/src/lib/llm/claude.ts
@ -0,0 +1,172 @@
+import Anthropic from "@anthropic-ai/sdk";
+import type { Tool } from "@anthropic-ai/sdk/resources/messages/messages";
+import * as fs from "fs";
+import * as path from "path";
+import type {
+    StreamChatParams,
+    StreamChatResult,
+    NormalizedToolCall,
+    NormalizedToolResult,
+} from "./types";
+import { toClaudeTools } from "./tools";
+
+const RAW_STREAM_LOG_PATH = path.resolve(
+    process.cwd(),
+    "claude-raw-stream.log",
+);
+
+type ContentBlock =
+    | { type: "text"; text: string }
+    | { type: "tool_use"; id: string; name: string; input: unknown }
+    | { type: string; [key: string]: unknown };
+
+type NativeMessage = {
+    role: "user" | "assistant";
+    content: string | ContentBlock[];
+};
+
+const MAX_TOKENS = 16384;
+
+function client(override?: string | null): Anthropic {
+    const apiKey = override?.trim() || process.env.ANTHROPIC_API_KEY || "";
+    return new Anthropic({ apiKey });
+}
+
+function toNativeMessages(
+    messages: StreamChatParams["messages"],
+): NativeMessage[] {
+    return messages.map((m) => ({ role: m.role, content: m.content }));
+}
+
+export async function streamClaude(
+    params: StreamChatParams,
+): Promise<StreamChatResult> {
+    const {
+        model,
+        systemPrompt,
+        tools = [],
+        callbacks = {},
+        runTools,
+        apiKeys,
+        enableThinking,
+    } = params;
+    const maxIter = params.maxIterations ?? 10;
+    const anthropic = client(apiKeys?.claude);
+    const claudeTools = toClaudeTools(tools);
+
+    const messages: NativeMessage[] = toNativeMessages(params.messages);
+    let fullText = "";
+
+    for (let iter = 0; iter < maxIter; iter++) {
+        const stream = anthropic.messages.stream({
+            model,
+            system: systemPrompt,
+            messages: messages as Anthropic.MessageParam[],
+            tools: claudeTools.length
+                ? (claudeTools as unknown as Tool[])
+                : undefined,
+            max_tokens: MAX_TOKENS,
+            // Claude 4.x models require `thinking.type: "adaptive"` and
+            // drive effort via `output_config.effort` rather than a fixed
+            // token budget. We only opt in when the caller requested it.
+            ...(enableThinking
+                ? ({
+                      thinking: { type: "adaptive" },
+                      output_config: { effort: "high" },
+                  } as unknown as Record<string, unknown>)
+                : {}),
+            // Extended thinking requires temperature to be default (omitted).
+        });
+
+        let sawThinking = false;
+
+        stream.on("streamEvent", (event) => {
+            const line = JSON.stringify(event);
+            console.log("[claude raw stream]", line);
+            fs.appendFile(RAW_STREAM_LOG_PATH, line + "\n", () => {});
+        });
+
+        stream.on("text", (delta) => {
+            callbacks.onContentDelta?.(delta);
+        });
+        if (enableThinking) {
+            stream.on("thinking", (delta) => {
+                sawThinking = true;
+                callbacks.onReasoningDelta?.(delta);
+            });
+        }
+
+        const final = await stream.finalMessage();
+        if (sawThinking) callbacks.onReasoningBlockEnd?.();
+        const stopReason = final.stop_reason;
+        const assistantBlocks = final.content as ContentBlock[];
+
+        // Extract text content and tool_use calls from the final assistant
+        // message so we can accumulate text and drive the tool-call loop.
+        const toolCalls: NormalizedToolCall[] = [];
+        for (const block of assistantBlocks) {
+            if (block.type === "text") {
+                const txt = (block as { text: string }).text;
+                if (typeof txt === "string") fullText += txt;
+            } else if (block.type === "tool_use") {
+                const tu = block as {
+                    id: string;
+                    name: string;
+                    input: unknown;
+                };
+                const call: NormalizedToolCall = {
+                    id: tu.id,
+                    name: tu.name,
+                    input: (tu.input as Record<string, unknown>) ?? {},
+                };
+                callbacks.onToolCallStart?.(call);
+                toolCalls.push(call);
+            }
+        }
+
+        if (stopReason !== "tool_use" || !toolCalls.length || !runTools) {
+            break;
+        }
+
+        const results = await runTools(toolCalls);
+
+        // Record the assistant turn (preserving the original content blocks,
+        // which Claude requires on the follow-up) and the user turn that
+        // carries the tool_result blocks.
+        messages.push({ role: "assistant", content: assistantBlocks });
+        messages.push({
+            role: "user",
+            content: results.map((r) => ({
+                type: "tool_result",
+                tool_use_id: r.tool_use_id,
+                content: r.content,
+            })),
+        });
+    }
+
+    return { fullText };
+}
+
+export async function completeClaudeText(params: {
+    model: string;
+    systemPrompt?: string;
+    user: string;
+    maxTokens?: number;
+    apiKeys?: { claude?: string | null };
+}): Promise<string> {
+    const anthropic = client(params.apiKeys?.claude);
+    const resp = await anthropic.messages.create({
+        model: params.model,
+        max_tokens: params.maxTokens ?? 512,
+        system: params.systemPrompt,
+        messages: [{ role: "user", content: params.user }],
+    });
+    const text = resp.content
+        .filter((b): b is Anthropic.TextBlock => b.type === "text")
+        .map((b) => b.text)
+        .join("");
+    return text;
+}
+
+// Helper re-export for callers wanting to hand normalized results back in.
+export type { NormalizedToolResult };
--- a/backend/src/lib/llm/gemini.ts
+++ b/backend/src/lib/llm/gemini.ts
@ -0,0 +1,162 @@
+import { GoogleGenAI } from "@google/genai";
+import type {
+    StreamChatParams,
+    StreamChatResult,
+    NormalizedToolCall,
+} from "./types";
+import { toGeminiTools } from "./tools";
+
+type GeminiPart = {
+    text?: string;
+    // Set by Gemini when the text content is a thought summary rather than
+    // final-answer prose. Requires `thinkingConfig.includeThoughts: true`.
+    thought?: boolean;
+    functionCall?: { id?: string; name: string; args?: Record<string, unknown> };
+    functionResponse?: {
+        id?: string;
+        name: string;
+        response: Record<string, unknown>;
+    };
+    // Gemini 3 returns a thoughtSignature on parts that contain reasoning or
+    // a functionCall. It must be echoed back verbatim on the same part when
+    // we replay the model's turn, or the API rejects the next call.
+    thoughtSignature?: string;
+};
+
+type GeminiContent = {
+    role: "user" | "model";
+    parts: GeminiPart[];
+};
+
+function client(override?: string | null): GoogleGenAI {
+    const apiKey = override?.trim() || process.env.GEMINI_API_KEY || "";
+    return new GoogleGenAI({ apiKey });
+}
+
+function toNativeContents(messages: StreamChatParams["messages"]): GeminiContent[] {
+    return messages.map((m) => ({
+        role: m.role === "assistant" ? "model" : "user",
+        parts: [{ text: m.content }],
+    }));
+}
+
+export async function streamGemini(
+    params: StreamChatParams,
+): Promise<StreamChatResult> {
+    const { model, systemPrompt, tools = [], callbacks = {}, runTools, apiKeys, enableThinking } = params;
+    const maxIter = params.maxIterations ?? 10;
+    const ai = client(apiKeys?.gemini);
+    const functionDeclarations = toGeminiTools(tools);
+
+    const contents: GeminiContent[] = toNativeContents(params.messages);
+    let fullText = "";
+
+    for (let iter = 0; iter < maxIter; iter++) {
+        const stream = await ai.models.generateContentStream({
+            model,
+            contents: contents as never,
+            config: {
+                systemInstruction: systemPrompt,
+                tools: functionDeclarations.length
+                    ? [{ functionDeclarations } as never]
+                    : undefined,
+                // When enabled, ask Gemini to surface thought summaries.
+                // When disabled, explicitly zero the thinking budget so the
+                // model skips thinking entirely (saves tokens and latency
+                // for bulk extraction jobs).
+                thinkingConfig: enableThinking
+                    ? { includeThoughts: true }
+                    : { thinkingBudget: 0 },
+            },
+        });
+
+        // Per-iteration accumulators.
+        const textParts: string[] = [];
+        const callParts: GeminiPart[] = [];
+        const toolCalls: NormalizedToolCall[] = [];
+        let sawThinking = false;
+
+        for await (const chunk of stream) {
+            console.log("[gemini stream chunk]", JSON.stringify(chunk, null, 2));
+            const parts =
+                (chunk as { candidates?: { content?: { parts?: GeminiPart[] } }[] })
+                    .candidates?.[0]?.content?.parts ?? [];
+
+            for (const part of parts) {
+                if (part.text) {
+                    if (part.thought) {
+                        sawThinking = true;
+                        callbacks.onReasoningDelta?.(part.text);
+                    } else {
+                        textParts.push(part.text);
+                        callbacks.onContentDelta?.(part.text);
+                    }
+                }
+                if (part.functionCall) {
+                    // Preserve the whole part (including thoughtSignature)
+                    // so it can be echoed verbatim in the replay turn.
+                    callParts.push(part);
+                    const call: NormalizedToolCall = {
+                        id: part.functionCall.id ?? `${part.functionCall.name}-${toolCalls.length}`,
+                        name: part.functionCall.name,
+                        input: part.functionCall.args ?? {},
+                    };
+                    callbacks.onToolCallStart?.(call);
+                    toolCalls.push(call);
+                }
+            }
+        }
+
+        if (sawThinking) callbacks.onReasoningBlockEnd?.();
+
+        fullText += textParts.join("");
+
+        if (!toolCalls.length || !runTools) {
+            break;
+        }
+
+        const results = await runTools(toolCalls);
+
+        // Append the model's turn (text + functionCall parts, in that order)
+        // and the matching functionResponse turn.
+        const modelParts: GeminiPart[] = [];
+        if (textParts.length) modelParts.push({ text: textParts.join("") });
+        for (const cp of callParts) modelParts.push(cp);
+        contents.push({ role: "model", parts: modelParts });
+
+        contents.push({
+            role: "user",
+            parts: results.map((r) => {
+                const match = toolCalls.find((c) => c.id === r.tool_use_id);
+                return {
+                    functionResponse: {
+                        ...(r.tool_use_id && !r.tool_use_id.startsWith(match?.name ?? "")
+                            ? { id: r.tool_use_id }
+                            : {}),
+                        name: match?.name ?? "tool",
+                        response: { output: r.content },
+                    },
+                };
+            }),
+        });
+    }
+
+    return { fullText };
+}
+
+export async function completeGeminiText(params: {
+    model: string;
+    systemPrompt?: string;
+    user: string;
+    apiKeys?: { gemini?: string | null };
+}): Promise<string> {
+    const ai = client(params.apiKeys?.gemini);
+    const resp = await ai.models.generateContent({
+        model: params.model,
+        contents: [{ role: "user", parts: [{ text: params.user }] }],
+        config: params.systemPrompt
+            ? { systemInstruction: params.systemPrompt }
+            : undefined,
+    });
+    return resp.text ?? "";
+}
--- a/backend/src/lib/llm/index.ts
+++ b/backend/src/lib/llm/index.ts
@ -0,0 +1,27 @@
+import { streamClaude, completeClaudeText } from "./claude";
+import { streamGemini, completeGeminiText } from "./gemini";
+import { providerForModel } from "./models";
+import type { StreamChatParams, StreamChatResult, UserApiKeys } from "./types";
+
+export * from "./types";
+export * from "./models";
+
+export async function streamChatWithTools(
+    params: StreamChatParams,
+): Promise<StreamChatResult> {
+    const provider = providerForModel(params.model);
+    if (provider === "claude") return streamClaude(params);
+    return streamGemini(params);
+}
+
+export async function completeText(params: {
+    model: string;
+    systemPrompt?: string;
+    user: string;
+    maxTokens?: number;
+    apiKeys?: UserApiKeys;
+}): Promise<string> {
+    const provider = providerForModel(params.model);
+    if (provider === "claude") return completeClaudeText(params);
+    return completeGeminiText(params);
+}
--- a/backend/src/lib/llm/models.ts
+++ b/backend/src/lib/llm/models.ts
@ -0,0 +1,48 @@
+import type { Provider } from "./types";
+
+// ---------------------------------------------------------------------------
+// Canonical model IDs
+// ---------------------------------------------------------------------------
+// Main-chat tier (top-end) — user picks one of these per message.
+export const CLAUDE_MAIN_MODELS = ["claude-opus-4-7", "claude-sonnet-4-6"] as const;
+export const GEMINI_MAIN_MODELS = [
+    "gemini-3.1-pro-preview",
+    "gemini-3-flash-preview",
+] as const;
+
+// Mid-tier (used for tabular review) — user picks one in account settings.
+export const CLAUDE_MID_MODELS = ["claude-sonnet-4-6"] as const;
+export const GEMINI_MID_MODELS = ["gemini-3-flash-preview"] as const;
+
+// Low-tier (used for title generation, lightweight extractions) — user picks
+// one in account settings.
+export const CLAUDE_LOW_MODELS = ["claude-haiku-4-5"] as const;
+export const GEMINI_LOW_MODELS = ["gemini-3.1-flash-lite-preview"] as const;
+
+export const DEFAULT_MAIN_MODEL = "gemini-3-flash-preview";
+export const DEFAULT_TITLE_MODEL = "gemini-3.1-flash-lite-preview";
+export const DEFAULT_TABULAR_MODEL = "gemini-3-flash-preview";
+
+const ALL_MODELS = new Set<string>([
+    ...CLAUDE_MAIN_MODELS,
+    ...GEMINI_MAIN_MODELS,
+    ...CLAUDE_MID_MODELS,
+    ...GEMINI_MID_MODELS,
+    ...CLAUDE_LOW_MODELS,
+    ...GEMINI_LOW_MODELS,
+]);
+
+// ---------------------------------------------------------------------------
+// Provider inference
+// ---------------------------------------------------------------------------
+
+export function providerForModel(model: string): Provider {
+    if (model.startsWith("claude")) return "claude";
+    if (model.startsWith("gemini")) return "gemini";
+    throw new Error(`Unknown model id: ${model}`);
+}
+
+export function resolveModel(id: string | null | undefined, fallback: string): string {
+    if (id && ALL_MODELS.has(id)) return id;
+    return fallback;
+}
--- a/backend/src/lib/llm/tools.ts
+++ b/backend/src/lib/llm/tools.ts
@ -0,0 +1,74 @@
+import type { OpenAIToolSchema } from "./types";
+
+// ---------------------------------------------------------------------------
+// Tool-schema adapters
+// ---------------------------------------------------------------------------
+// Callers hand us OpenAI-style tool definitions. Provider-specific converters
+// live here so the rest of the code never has to think about it.
+
+export type ClaudeTool = {
+    name: string;
+    description: string;
+    input_schema: Record<string, unknown>;
+};
+
+export function toClaudeTools(tools: OpenAIToolSchema[]): ClaudeTool[] {
+    return tools.map((t) => ({
+        name: t.function.name,
+        description: t.function.description,
+        input_schema: normalizeSchema(t.function.parameters),
+    }));
+}
+
+export type GeminiFunctionDeclaration = {
+    name: string;
+    description: string;
+    parameters?: Record<string, unknown>;
+};
+
+export function toGeminiTools(tools: OpenAIToolSchema[]): GeminiFunctionDeclaration[] {
+    return tools.map((t) => {
+        const params = normalizeSchema(t.function.parameters);
+        // Gemini rejects `{ type: "object", properties: {} }` with no fields
+        // present; omit the parameters key entirely when empty.
+        const hasProps =
+            params &&
+            typeof params === "object" &&
+            Object.keys((params as { properties?: Record<string, unknown> }).properties ?? {}).length > 0;
+        return {
+            name: t.function.name,
+            description: t.function.description,
+            ...(hasProps ? { parameters: params } : {}),
+        };
+    });
+}
+
+// ---------------------------------------------------------------------------
+// Schema normalization
+// ---------------------------------------------------------------------------
+// The OpenAI tool schemas in the codebase already use plain JSON-Schema-lite
+// shape. Both Claude and Gemini accept that shape. We only sanitise a couple
+// of edge cases: `integer` is accepted by both, but we make sure arrays have
+// `items` and objects have `properties` so Gemini doesn't error.
+
+function normalizeSchema(schema: unknown): Record<string, unknown> {
+    if (!schema || typeof schema !== "object") {
+        return { type: "object", properties: {} };
+    }
+    const s = schema as Record<string, unknown>;
+    const type = s.type;
+    const out: Record<string, unknown> = { ...s };
+
+    if (type === "object") {
+        const props = (s.properties as Record<string, unknown>) ?? {};
+        const normProps: Record<string, unknown> = {};
+        for (const [k, v] of Object.entries(props)) {
+            normProps[k] = normalizeSchema(v);
+        }
+        out.properties = normProps;
+    }
+    if (type === "array" && s.items) {
+        out.items = normalizeSchema(s.items);
+    }
+    return out;
+}
--- a/backend/src/lib/llm/types.ts
+++ b/backend/src/lib/llm/types.ts
@ -0,0 +1,64 @@
+// Shared types for the LLM provider adapter.
+// Callers always speak OpenAI-style tools + { role, content } messages; each
+// provider translates internally.
+
+export type Provider = "claude" | "gemini";
+
+export type OpenAIToolSchema = {
+    type: "function";
+    function: {
+        name: string;
+        description: string;
+        parameters: Record<string, unknown>;
+    };
+};
+
+export type LlmMessage = {
+    role: "user" | "assistant";
+    content: string;
+};
+
+export type NormalizedToolCall = {
+    id: string;
+    name: string;
+    input: Record<string, unknown>;
+};
+
+export type NormalizedToolResult = {
+    tool_use_id: string;
+    content: string;
+};
+
+export type StreamCallbacks = {
+    onReasoningDelta?: (text: string) => void;
+    onReasoningBlockEnd?: () => void;
+    onContentDelta?: (text: string) => void;
+    onToolCallStart?: (call: NormalizedToolCall) => void;
+};
+
+export type UserApiKeys = {
+    claude?: string | null;
+    gemini?: string | null;
+};
+
+export type StreamChatParams = {
+    model: string;
+    systemPrompt: string;
+    messages: LlmMessage[];
+    tools?: OpenAIToolSchema[];
+    maxIterations?: number;
+    callbacks?: StreamCallbacks;
+    runTools?: (calls: NormalizedToolCall[]) => Promise<NormalizedToolResult[]>;
+    apiKeys?: UserApiKeys;
+    /**
+     * Enable provider-side reasoning/thinking. Off by default — should only
+     * be turned on for interactive chat surfaces where the user actually
+     * benefits from seeing the thought stream. Bulk extraction jobs and
+     * one-shot completions should leave this off to save tokens and latency.
+     */
+    enableThinking?: boolean;
+};
+
+export type StreamChatResult = {
+    fullText: string;
+};